Performance improvements in SEEK_DATA and xattr scalability
improvements, plus a lot of clean ups and bug fixes. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQEcBAABCAAGBQJW6c9mAAoJEPL5WVaVDYGjWsEIAJkWUvKB3GgGgP82sKDBP2P8 IbWegO1ICMrSY78BqLI7mLCqggH5JClBgYU3O4VFv8Brj1L9mS5X+vflaDE1j9jj Ik1KZKtZl1opOwO1L3D4l/ipZAiENUp7NehTtpsFousmz6nMZ5vo6x4t3QSwbUIm YXpxUIxHEhBcW5i3EDkfYG8305V5oj8HsVf6T98OlWGpBO5VGNMAHvA7CQdQe7Rd chv70rij5V684bJAEoosEFXVAuOUrxcBqbFA3Nlb432YOPj0ISLx76kw0GIjUYtf yjoSClbRgwxGzh0jm+yaoYjjm83xbsYbHSsBmh3+/QLMbKTLXeCqR/BiqJavmcM= =bWpz -----END PGP SIGNATURE----- Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 updates from Ted Ts'o: "Performance improvements in SEEK_DATA and xattr scalability improvements, plus a lot of clean ups and bug fixes" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (38 commits) ext4: clean up error handling in the MMP support jbd2: do not fail journal because of frozen_buffer allocation failure ext4: use __GFP_NOFAIL in ext4_free_blocks() ext4: fix compile error while opening the macro DOUBLE_CHECK ext4: print ext4 mount option data_err=abort correctly ext4: fix NULL pointer dereference in ext4_mark_inode_dirty() ext4: drop unneeded BUFFER_TRACE in ext4_delete_inline_entry() ext4: fix misspellings in comments. jbd2: fix FS corruption possibility in jbd2_journal_destroy() on umount path ext4: more efficient SEEK_DATA implementation ext4: cleanup handling of bh->b_state in DAX mmap ext4: return hole from ext4_map_blocks() ext4: factor out determining of hole size ext4: fix setting of referenced bit in ext4_es_lookup_extent() ext4: remove i_ioend_count ext4: simplify io_end handling for AIO DIO ext4: move trans handling and completion deferal out of _ext4_get_block ext4: rename and split get blocks functions ext4: use i_mutex to serialize unaligned AIO DIO ext4: pack ioend structure better ...
This commit is contained in:
commit
faeb20ecfa
|
@ -61,6 +61,8 @@ struct ext2_block_alloc_info {
|
||||||
#define rsv_start rsv_window._rsv_start
|
#define rsv_start rsv_window._rsv_start
|
||||||
#define rsv_end rsv_window._rsv_end
|
#define rsv_end rsv_window._rsv_end
|
||||||
|
|
||||||
|
struct mb_cache;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* second extended-fs super-block data in memory
|
* second extended-fs super-block data in memory
|
||||||
*/
|
*/
|
||||||
|
@ -111,6 +113,7 @@ struct ext2_sb_info {
|
||||||
* of the mount options.
|
* of the mount options.
|
||||||
*/
|
*/
|
||||||
spinlock_t s_lock;
|
spinlock_t s_lock;
|
||||||
|
struct mb_cache *s_mb_cache;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline spinlock_t *
|
static inline spinlock_t *
|
||||||
|
|
|
@ -131,7 +131,10 @@ static void ext2_put_super (struct super_block * sb)
|
||||||
|
|
||||||
dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
|
dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
|
||||||
|
|
||||||
ext2_xattr_put_super(sb);
|
if (sbi->s_mb_cache) {
|
||||||
|
ext2_xattr_destroy_cache(sbi->s_mb_cache);
|
||||||
|
sbi->s_mb_cache = NULL;
|
||||||
|
}
|
||||||
if (!(sb->s_flags & MS_RDONLY)) {
|
if (!(sb->s_flags & MS_RDONLY)) {
|
||||||
struct ext2_super_block *es = sbi->s_es;
|
struct ext2_super_block *es = sbi->s_es;
|
||||||
|
|
||||||
|
@ -1104,6 +1107,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
|
||||||
ext2_msg(sb, KERN_ERR, "error: insufficient memory");
|
ext2_msg(sb, KERN_ERR, "error: insufficient memory");
|
||||||
goto failed_mount3;
|
goto failed_mount3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_EXT2_FS_XATTR
|
||||||
|
sbi->s_mb_cache = ext2_xattr_create_cache();
|
||||||
|
if (!sbi->s_mb_cache) {
|
||||||
|
ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache");
|
||||||
|
goto failed_mount3;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
/*
|
/*
|
||||||
* set up enough so that it can read an inode
|
* set up enough so that it can read an inode
|
||||||
*/
|
*/
|
||||||
|
@ -1149,6 +1160,8 @@ cantfind_ext2:
|
||||||
sb->s_id);
|
sb->s_id);
|
||||||
goto failed_mount;
|
goto failed_mount;
|
||||||
failed_mount3:
|
failed_mount3:
|
||||||
|
if (sbi->s_mb_cache)
|
||||||
|
ext2_xattr_destroy_cache(sbi->s_mb_cache);
|
||||||
percpu_counter_destroy(&sbi->s_freeblocks_counter);
|
percpu_counter_destroy(&sbi->s_freeblocks_counter);
|
||||||
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
||||||
percpu_counter_destroy(&sbi->s_dirs_counter);
|
percpu_counter_destroy(&sbi->s_dirs_counter);
|
||||||
|
@ -1555,20 +1568,17 @@ MODULE_ALIAS_FS("ext2");
|
||||||
|
|
||||||
static int __init init_ext2_fs(void)
|
static int __init init_ext2_fs(void)
|
||||||
{
|
{
|
||||||
int err = init_ext2_xattr();
|
int err;
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
err = init_inodecache();
|
err = init_inodecache();
|
||||||
if (err)
|
if (err)
|
||||||
goto out1;
|
return err;
|
||||||
err = register_filesystem(&ext2_fs_type);
|
err = register_filesystem(&ext2_fs_type);
|
||||||
if (err)
|
if (err)
|
||||||
goto out;
|
goto out;
|
||||||
return 0;
|
return 0;
|
||||||
out:
|
out:
|
||||||
destroy_inodecache();
|
destroy_inodecache();
|
||||||
out1:
|
|
||||||
exit_ext2_xattr();
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1576,7 +1586,6 @@ static void __exit exit_ext2_fs(void)
|
||||||
{
|
{
|
||||||
unregister_filesystem(&ext2_fs_type);
|
unregister_filesystem(&ext2_fs_type);
|
||||||
destroy_inodecache();
|
destroy_inodecache();
|
||||||
exit_ext2_xattr();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MODULE_AUTHOR("Remy Card and others");
|
MODULE_AUTHOR("Remy Card and others");
|
||||||
|
|
139
fs/ext2/xattr.c
139
fs/ext2/xattr.c
|
@ -90,14 +90,12 @@
|
||||||
static int ext2_xattr_set2(struct inode *, struct buffer_head *,
|
static int ext2_xattr_set2(struct inode *, struct buffer_head *,
|
||||||
struct ext2_xattr_header *);
|
struct ext2_xattr_header *);
|
||||||
|
|
||||||
static int ext2_xattr_cache_insert(struct buffer_head *);
|
static int ext2_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
|
||||||
static struct buffer_head *ext2_xattr_cache_find(struct inode *,
|
static struct buffer_head *ext2_xattr_cache_find(struct inode *,
|
||||||
struct ext2_xattr_header *);
|
struct ext2_xattr_header *);
|
||||||
static void ext2_xattr_rehash(struct ext2_xattr_header *,
|
static void ext2_xattr_rehash(struct ext2_xattr_header *,
|
||||||
struct ext2_xattr_entry *);
|
struct ext2_xattr_entry *);
|
||||||
|
|
||||||
static struct mb_cache *ext2_xattr_cache;
|
|
||||||
|
|
||||||
static const struct xattr_handler *ext2_xattr_handler_map[] = {
|
static const struct xattr_handler *ext2_xattr_handler_map[] = {
|
||||||
[EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler,
|
[EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler,
|
||||||
#ifdef CONFIG_EXT2_FS_POSIX_ACL
|
#ifdef CONFIG_EXT2_FS_POSIX_ACL
|
||||||
|
@ -152,6 +150,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
|
||||||
size_t name_len, size;
|
size_t name_len, size;
|
||||||
char *end;
|
char *end;
|
||||||
int error;
|
int error;
|
||||||
|
struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
|
||||||
|
|
||||||
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
|
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
|
||||||
name_index, name, buffer, (long)buffer_size);
|
name_index, name, buffer, (long)buffer_size);
|
||||||
|
@ -196,7 +195,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
|
||||||
goto found;
|
goto found;
|
||||||
entry = next;
|
entry = next;
|
||||||
}
|
}
|
||||||
if (ext2_xattr_cache_insert(bh))
|
if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
|
||||||
ea_idebug(inode, "cache insert failed");
|
ea_idebug(inode, "cache insert failed");
|
||||||
error = -ENODATA;
|
error = -ENODATA;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
@ -209,7 +208,7 @@ found:
|
||||||
le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
|
le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
|
||||||
goto bad_block;
|
goto bad_block;
|
||||||
|
|
||||||
if (ext2_xattr_cache_insert(bh))
|
if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
|
||||||
ea_idebug(inode, "cache insert failed");
|
ea_idebug(inode, "cache insert failed");
|
||||||
if (buffer) {
|
if (buffer) {
|
||||||
error = -ERANGE;
|
error = -ERANGE;
|
||||||
|
@ -247,6 +246,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
|
||||||
char *end;
|
char *end;
|
||||||
size_t rest = buffer_size;
|
size_t rest = buffer_size;
|
||||||
int error;
|
int error;
|
||||||
|
struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
|
||||||
|
|
||||||
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
|
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
|
||||||
buffer, (long)buffer_size);
|
buffer, (long)buffer_size);
|
||||||
|
@ -281,7 +281,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
|
||||||
goto bad_block;
|
goto bad_block;
|
||||||
entry = next;
|
entry = next;
|
||||||
}
|
}
|
||||||
if (ext2_xattr_cache_insert(bh))
|
if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
|
||||||
ea_idebug(inode, "cache insert failed");
|
ea_idebug(inode, "cache insert failed");
|
||||||
|
|
||||||
/* list the attribute names */
|
/* list the attribute names */
|
||||||
|
@ -483,22 +483,23 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
|
||||||
/* Here we know that we can set the new attribute. */
|
/* Here we know that we can set the new attribute. */
|
||||||
|
|
||||||
if (header) {
|
if (header) {
|
||||||
struct mb_cache_entry *ce;
|
|
||||||
|
|
||||||
/* assert(header == HDR(bh)); */
|
/* assert(header == HDR(bh)); */
|
||||||
ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev,
|
|
||||||
bh->b_blocknr);
|
|
||||||
lock_buffer(bh);
|
lock_buffer(bh);
|
||||||
if (header->h_refcount == cpu_to_le32(1)) {
|
if (header->h_refcount == cpu_to_le32(1)) {
|
||||||
|
__u32 hash = le32_to_cpu(header->h_hash);
|
||||||
|
|
||||||
ea_bdebug(bh, "modifying in-place");
|
ea_bdebug(bh, "modifying in-place");
|
||||||
if (ce)
|
/*
|
||||||
mb_cache_entry_free(ce);
|
* This must happen under buffer lock for
|
||||||
|
* ext2_xattr_set2() to reliably detect modified block
|
||||||
|
*/
|
||||||
|
mb_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache,
|
||||||
|
hash, bh->b_blocknr);
|
||||||
|
|
||||||
/* keep the buffer locked while modifying it. */
|
/* keep the buffer locked while modifying it. */
|
||||||
} else {
|
} else {
|
||||||
int offset;
|
int offset;
|
||||||
|
|
||||||
if (ce)
|
|
||||||
mb_cache_entry_release(ce);
|
|
||||||
unlock_buffer(bh);
|
unlock_buffer(bh);
|
||||||
ea_bdebug(bh, "cloning");
|
ea_bdebug(bh, "cloning");
|
||||||
header = kmalloc(bh->b_size, GFP_KERNEL);
|
header = kmalloc(bh->b_size, GFP_KERNEL);
|
||||||
|
@ -626,6 +627,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
|
||||||
struct super_block *sb = inode->i_sb;
|
struct super_block *sb = inode->i_sb;
|
||||||
struct buffer_head *new_bh = NULL;
|
struct buffer_head *new_bh = NULL;
|
||||||
int error;
|
int error;
|
||||||
|
struct mb_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache;
|
||||||
|
|
||||||
if (header) {
|
if (header) {
|
||||||
new_bh = ext2_xattr_cache_find(inode, header);
|
new_bh = ext2_xattr_cache_find(inode, header);
|
||||||
|
@ -653,7 +655,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
|
||||||
don't need to change the reference count. */
|
don't need to change the reference count. */
|
||||||
new_bh = old_bh;
|
new_bh = old_bh;
|
||||||
get_bh(new_bh);
|
get_bh(new_bh);
|
||||||
ext2_xattr_cache_insert(new_bh);
|
ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
|
||||||
} else {
|
} else {
|
||||||
/* We need to allocate a new block */
|
/* We need to allocate a new block */
|
||||||
ext2_fsblk_t goal = ext2_group_first_block_no(sb,
|
ext2_fsblk_t goal = ext2_group_first_block_no(sb,
|
||||||
|
@ -674,7 +676,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
|
||||||
memcpy(new_bh->b_data, header, new_bh->b_size);
|
memcpy(new_bh->b_data, header, new_bh->b_size);
|
||||||
set_buffer_uptodate(new_bh);
|
set_buffer_uptodate(new_bh);
|
||||||
unlock_buffer(new_bh);
|
unlock_buffer(new_bh);
|
||||||
ext2_xattr_cache_insert(new_bh);
|
ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
|
||||||
|
|
||||||
ext2_xattr_update_super_block(sb);
|
ext2_xattr_update_super_block(sb);
|
||||||
}
|
}
|
||||||
|
@ -707,19 +709,21 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
|
||||||
|
|
||||||
error = 0;
|
error = 0;
|
||||||
if (old_bh && old_bh != new_bh) {
|
if (old_bh && old_bh != new_bh) {
|
||||||
struct mb_cache_entry *ce;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there was an old block and we are no longer using it,
|
* If there was an old block and we are no longer using it,
|
||||||
* release the old block.
|
* release the old block.
|
||||||
*/
|
*/
|
||||||
ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev,
|
|
||||||
old_bh->b_blocknr);
|
|
||||||
lock_buffer(old_bh);
|
lock_buffer(old_bh);
|
||||||
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
|
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
|
||||||
|
__u32 hash = le32_to_cpu(HDR(old_bh)->h_hash);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This must happen under buffer lock for
|
||||||
|
* ext2_xattr_set2() to reliably detect freed block
|
||||||
|
*/
|
||||||
|
mb_cache_entry_delete_block(ext2_mb_cache,
|
||||||
|
hash, old_bh->b_blocknr);
|
||||||
/* Free the old block. */
|
/* Free the old block. */
|
||||||
if (ce)
|
|
||||||
mb_cache_entry_free(ce);
|
|
||||||
ea_bdebug(old_bh, "freeing");
|
ea_bdebug(old_bh, "freeing");
|
||||||
ext2_free_blocks(inode, old_bh->b_blocknr, 1);
|
ext2_free_blocks(inode, old_bh->b_blocknr, 1);
|
||||||
mark_inode_dirty(inode);
|
mark_inode_dirty(inode);
|
||||||
|
@ -730,8 +734,6 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
|
||||||
} else {
|
} else {
|
||||||
/* Decrement the refcount only. */
|
/* Decrement the refcount only. */
|
||||||
le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
|
le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
|
||||||
if (ce)
|
|
||||||
mb_cache_entry_release(ce);
|
|
||||||
dquot_free_block_nodirty(inode, 1);
|
dquot_free_block_nodirty(inode, 1);
|
||||||
mark_inode_dirty(inode);
|
mark_inode_dirty(inode);
|
||||||
mark_buffer_dirty(old_bh);
|
mark_buffer_dirty(old_bh);
|
||||||
|
@ -757,7 +759,6 @@ void
|
||||||
ext2_xattr_delete_inode(struct inode *inode)
|
ext2_xattr_delete_inode(struct inode *inode)
|
||||||
{
|
{
|
||||||
struct buffer_head *bh = NULL;
|
struct buffer_head *bh = NULL;
|
||||||
struct mb_cache_entry *ce;
|
|
||||||
|
|
||||||
down_write(&EXT2_I(inode)->xattr_sem);
|
down_write(&EXT2_I(inode)->xattr_sem);
|
||||||
if (!EXT2_I(inode)->i_file_acl)
|
if (!EXT2_I(inode)->i_file_acl)
|
||||||
|
@ -777,19 +778,22 @@ ext2_xattr_delete_inode(struct inode *inode)
|
||||||
EXT2_I(inode)->i_file_acl);
|
EXT2_I(inode)->i_file_acl);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
|
|
||||||
lock_buffer(bh);
|
lock_buffer(bh);
|
||||||
if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
|
if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
|
||||||
if (ce)
|
__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
|
||||||
mb_cache_entry_free(ce);
|
|
||||||
|
/*
|
||||||
|
* This must happen under buffer lock for ext2_xattr_set2() to
|
||||||
|
* reliably detect freed block
|
||||||
|
*/
|
||||||
|
mb_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache,
|
||||||
|
hash, bh->b_blocknr);
|
||||||
ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
|
ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
|
||||||
get_bh(bh);
|
get_bh(bh);
|
||||||
bforget(bh);
|
bforget(bh);
|
||||||
unlock_buffer(bh);
|
unlock_buffer(bh);
|
||||||
} else {
|
} else {
|
||||||
le32_add_cpu(&HDR(bh)->h_refcount, -1);
|
le32_add_cpu(&HDR(bh)->h_refcount, -1);
|
||||||
if (ce)
|
|
||||||
mb_cache_entry_release(ce);
|
|
||||||
ea_bdebug(bh, "refcount now=%d",
|
ea_bdebug(bh, "refcount now=%d",
|
||||||
le32_to_cpu(HDR(bh)->h_refcount));
|
le32_to_cpu(HDR(bh)->h_refcount));
|
||||||
unlock_buffer(bh);
|
unlock_buffer(bh);
|
||||||
|
@ -805,18 +809,6 @@ cleanup:
|
||||||
up_write(&EXT2_I(inode)->xattr_sem);
|
up_write(&EXT2_I(inode)->xattr_sem);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* ext2_xattr_put_super()
|
|
||||||
*
|
|
||||||
* This is called when a file system is unmounted.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
ext2_xattr_put_super(struct super_block *sb)
|
|
||||||
{
|
|
||||||
mb_cache_shrink(sb->s_bdev);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ext2_xattr_cache_insert()
|
* ext2_xattr_cache_insert()
|
||||||
*
|
*
|
||||||
|
@ -826,28 +818,20 @@ ext2_xattr_put_super(struct super_block *sb)
|
||||||
* Returns 0, or a negative error number on failure.
|
* Returns 0, or a negative error number on failure.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
ext2_xattr_cache_insert(struct buffer_head *bh)
|
ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh)
|
||||||
{
|
{
|
||||||
__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
|
__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
|
||||||
struct mb_cache_entry *ce;
|
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS);
|
error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr, 1);
|
||||||
if (!ce)
|
|
||||||
return -ENOMEM;
|
|
||||||
error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
|
|
||||||
if (error) {
|
if (error) {
|
||||||
mb_cache_entry_free(ce);
|
|
||||||
if (error == -EBUSY) {
|
if (error == -EBUSY) {
|
||||||
ea_bdebug(bh, "already in cache (%d cache entries)",
|
ea_bdebug(bh, "already in cache (%d cache entries)",
|
||||||
atomic_read(&ext2_xattr_cache->c_entry_count));
|
atomic_read(&ext2_xattr_cache->c_entry_count));
|
||||||
error = 0;
|
error = 0;
|
||||||
}
|
}
|
||||||
} else {
|
} else
|
||||||
ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
|
ea_bdebug(bh, "inserting [%x]", (int)hash);
|
||||||
atomic_read(&ext2_xattr_cache->c_entry_count));
|
|
||||||
mb_cache_entry_release(ce);
|
|
||||||
}
|
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -904,22 +888,16 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
|
||||||
{
|
{
|
||||||
__u32 hash = le32_to_cpu(header->h_hash);
|
__u32 hash = le32_to_cpu(header->h_hash);
|
||||||
struct mb_cache_entry *ce;
|
struct mb_cache_entry *ce;
|
||||||
|
struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
|
||||||
|
|
||||||
if (!header->h_hash)
|
if (!header->h_hash)
|
||||||
return NULL; /* never share */
|
return NULL; /* never share */
|
||||||
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
|
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
|
||||||
again:
|
again:
|
||||||
ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev,
|
ce = mb_cache_entry_find_first(ext2_mb_cache, hash);
|
||||||
hash);
|
|
||||||
while (ce) {
|
while (ce) {
|
||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
|
|
||||||
if (IS_ERR(ce)) {
|
|
||||||
if (PTR_ERR(ce) == -EAGAIN)
|
|
||||||
goto again;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
bh = sb_bread(inode->i_sb, ce->e_block);
|
bh = sb_bread(inode->i_sb, ce->e_block);
|
||||||
if (!bh) {
|
if (!bh) {
|
||||||
ext2_error(inode->i_sb, "ext2_xattr_cache_find",
|
ext2_error(inode->i_sb, "ext2_xattr_cache_find",
|
||||||
|
@ -927,7 +905,21 @@ again:
|
||||||
inode->i_ino, (unsigned long) ce->e_block);
|
inode->i_ino, (unsigned long) ce->e_block);
|
||||||
} else {
|
} else {
|
||||||
lock_buffer(bh);
|
lock_buffer(bh);
|
||||||
if (le32_to_cpu(HDR(bh)->h_refcount) >
|
/*
|
||||||
|
* We have to be careful about races with freeing or
|
||||||
|
* rehashing of xattr block. Once we hold buffer lock
|
||||||
|
* xattr block's state is stable so we can check
|
||||||
|
* whether the block got freed / rehashed or not.
|
||||||
|
* Since we unhash mbcache entry under buffer lock when
|
||||||
|
* freeing / rehashing xattr block, checking whether
|
||||||
|
* entry is still hashed is reliable.
|
||||||
|
*/
|
||||||
|
if (hlist_bl_unhashed(&ce->e_hash_list)) {
|
||||||
|
mb_cache_entry_put(ext2_mb_cache, ce);
|
||||||
|
unlock_buffer(bh);
|
||||||
|
brelse(bh);
|
||||||
|
goto again;
|
||||||
|
} else if (le32_to_cpu(HDR(bh)->h_refcount) >
|
||||||
EXT2_XATTR_REFCOUNT_MAX) {
|
EXT2_XATTR_REFCOUNT_MAX) {
|
||||||
ea_idebug(inode, "block %ld refcount %d>%d",
|
ea_idebug(inode, "block %ld refcount %d>%d",
|
||||||
(unsigned long) ce->e_block,
|
(unsigned long) ce->e_block,
|
||||||
|
@ -936,13 +928,14 @@ again:
|
||||||
} else if (!ext2_xattr_cmp(header, HDR(bh))) {
|
} else if (!ext2_xattr_cmp(header, HDR(bh))) {
|
||||||
ea_bdebug(bh, "b_count=%d",
|
ea_bdebug(bh, "b_count=%d",
|
||||||
atomic_read(&(bh->b_count)));
|
atomic_read(&(bh->b_count)));
|
||||||
mb_cache_entry_release(ce);
|
mb_cache_entry_touch(ext2_mb_cache, ce);
|
||||||
|
mb_cache_entry_put(ext2_mb_cache, ce);
|
||||||
return bh;
|
return bh;
|
||||||
}
|
}
|
||||||
unlock_buffer(bh);
|
unlock_buffer(bh);
|
||||||
brelse(bh);
|
brelse(bh);
|
||||||
}
|
}
|
||||||
ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
|
ce = mb_cache_entry_find_next(ext2_mb_cache, ce);
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -1015,17 +1008,15 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header,
|
||||||
|
|
||||||
#undef BLOCK_HASH_SHIFT
|
#undef BLOCK_HASH_SHIFT
|
||||||
|
|
||||||
int __init
|
#define HASH_BUCKET_BITS 10
|
||||||
init_ext2_xattr(void)
|
|
||||||
|
struct mb_cache *ext2_xattr_create_cache(void)
|
||||||
{
|
{
|
||||||
ext2_xattr_cache = mb_cache_create("ext2_xattr", 6);
|
return mb_cache_create(HASH_BUCKET_BITS);
|
||||||
if (!ext2_xattr_cache)
|
|
||||||
return -ENOMEM;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void ext2_xattr_destroy_cache(struct mb_cache *cache)
|
||||||
exit_ext2_xattr(void)
|
|
||||||
{
|
{
|
||||||
mb_cache_destroy(ext2_xattr_cache);
|
if (cache)
|
||||||
|
mb_cache_destroy(cache);
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,6 +53,8 @@ struct ext2_xattr_entry {
|
||||||
#define EXT2_XATTR_SIZE(size) \
|
#define EXT2_XATTR_SIZE(size) \
|
||||||
(((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
|
(((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
|
||||||
|
|
||||||
|
struct mb_cache;
|
||||||
|
|
||||||
# ifdef CONFIG_EXT2_FS_XATTR
|
# ifdef CONFIG_EXT2_FS_XATTR
|
||||||
|
|
||||||
extern const struct xattr_handler ext2_xattr_user_handler;
|
extern const struct xattr_handler ext2_xattr_user_handler;
|
||||||
|
@ -65,10 +67,9 @@ extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t);
|
||||||
extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
|
extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
|
||||||
|
|
||||||
extern void ext2_xattr_delete_inode(struct inode *);
|
extern void ext2_xattr_delete_inode(struct inode *);
|
||||||
extern void ext2_xattr_put_super(struct super_block *);
|
|
||||||
|
|
||||||
extern int init_ext2_xattr(void);
|
extern struct mb_cache *ext2_xattr_create_cache(void);
|
||||||
extern void exit_ext2_xattr(void);
|
extern void ext2_xattr_destroy_cache(struct mb_cache *cache);
|
||||||
|
|
||||||
extern const struct xattr_handler *ext2_xattr_handlers[];
|
extern const struct xattr_handler *ext2_xattr_handlers[];
|
||||||
|
|
||||||
|
@ -93,19 +94,7 @@ ext2_xattr_delete_inode(struct inode *inode)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void ext2_xattr_destroy_cache(struct mb_cache *cache)
|
||||||
ext2_xattr_put_super(struct super_block *sb)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int
|
|
||||||
init_ext2_xattr(void)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
exit_ext2_xattr(void)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,18 @@
|
||||||
* The fourth extended filesystem constants/structures
|
* The fourth extended filesystem constants/structures
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* with AGGRESSIVE_CHECK allocator runs consistency checks over
|
||||||
|
* structures. these checks slow things down a lot
|
||||||
|
*/
|
||||||
|
#define AGGRESSIVE_CHECK__
|
||||||
|
|
||||||
|
/*
|
||||||
|
* with DOUBLE_CHECK defined mballoc creates persistent in-core
|
||||||
|
* bitmaps, maintains and uses them to check for double allocations
|
||||||
|
*/
|
||||||
|
#define DOUBLE_CHECK__
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Define EXT4FS_DEBUG to produce debug messages
|
* Define EXT4FS_DEBUG to produce debug messages
|
||||||
*/
|
*/
|
||||||
|
@ -182,9 +194,9 @@ typedef struct ext4_io_end {
|
||||||
struct bio *bio; /* Linked list of completed
|
struct bio *bio; /* Linked list of completed
|
||||||
* bios covering the extent */
|
* bios covering the extent */
|
||||||
unsigned int flag; /* unwritten or not */
|
unsigned int flag; /* unwritten or not */
|
||||||
|
atomic_t count; /* reference counter */
|
||||||
loff_t offset; /* offset in the file */
|
loff_t offset; /* offset in the file */
|
||||||
ssize_t size; /* size of the extent */
|
ssize_t size; /* size of the extent */
|
||||||
atomic_t count; /* reference counter */
|
|
||||||
} ext4_io_end_t;
|
} ext4_io_end_t;
|
||||||
|
|
||||||
struct ext4_io_submit {
|
struct ext4_io_submit {
|
||||||
|
@ -1024,13 +1036,8 @@ struct ext4_inode_info {
|
||||||
* transaction reserved
|
* transaction reserved
|
||||||
*/
|
*/
|
||||||
struct list_head i_rsv_conversion_list;
|
struct list_head i_rsv_conversion_list;
|
||||||
/*
|
|
||||||
* Completed IOs that need unwritten extents handling and don't have
|
|
||||||
* transaction reserved
|
|
||||||
*/
|
|
||||||
atomic_t i_ioend_count; /* Number of outstanding io_end structs */
|
|
||||||
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
|
|
||||||
struct work_struct i_rsv_conversion_work;
|
struct work_struct i_rsv_conversion_work;
|
||||||
|
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
|
||||||
|
|
||||||
spinlock_t i_block_reservation_lock;
|
spinlock_t i_block_reservation_lock;
|
||||||
|
|
||||||
|
@ -1513,16 +1520,6 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode)
|
|
||||||
{
|
|
||||||
return inode->i_private;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io)
|
|
||||||
{
|
|
||||||
inode->i_private = io;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Inode dynamic state flags
|
* Inode dynamic state flags
|
||||||
*/
|
*/
|
||||||
|
@ -2506,12 +2503,14 @@ extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
|
||||||
int ext4_inode_is_fast_symlink(struct inode *inode);
|
int ext4_inode_is_fast_symlink(struct inode *inode);
|
||||||
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
|
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
|
||||||
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
|
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
|
||||||
int ext4_get_block_write(struct inode *inode, sector_t iblock,
|
int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
|
||||||
struct buffer_head *bh_result, int create);
|
struct buffer_head *bh_result, int create);
|
||||||
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
|
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
|
||||||
struct buffer_head *bh_result, int create);
|
struct buffer_head *bh_result, int create);
|
||||||
int ext4_get_block(struct inode *inode, sector_t iblock,
|
int ext4_get_block(struct inode *inode, sector_t iblock,
|
||||||
struct buffer_head *bh_result, int create);
|
struct buffer_head *bh_result, int create);
|
||||||
|
int ext4_dio_get_block(struct inode *inode, sector_t iblock,
|
||||||
|
struct buffer_head *bh_result, int create);
|
||||||
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
|
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
|
||||||
struct buffer_head *bh, int create);
|
struct buffer_head *bh, int create);
|
||||||
int ext4_walk_page_buffers(handle_t *handle,
|
int ext4_walk_page_buffers(handle_t *handle,
|
||||||
|
@ -2559,6 +2558,9 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
|
||||||
int used, int quota_claim);
|
int used, int quota_claim);
|
||||||
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
|
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_fsblk_t pblk, ext4_lblk_t len);
|
ext4_fsblk_t pblk, ext4_lblk_t len);
|
||||||
|
extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
|
unsigned int map_len,
|
||||||
|
struct extent_status *result);
|
||||||
|
|
||||||
/* indirect.c */
|
/* indirect.c */
|
||||||
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
|
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
|
||||||
|
@ -3285,10 +3287,7 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
|
||||||
#define EXT4_WQ_HASH_SZ 37
|
#define EXT4_WQ_HASH_SZ 37
|
||||||
#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
|
#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
|
||||||
EXT4_WQ_HASH_SZ])
|
EXT4_WQ_HASH_SZ])
|
||||||
#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
|
|
||||||
EXT4_WQ_HASH_SZ])
|
|
||||||
extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
|
extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
|
||||||
extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
|
|
||||||
|
|
||||||
#define EXT4_RESIZING 0
|
#define EXT4_RESIZING 0
|
||||||
extern int ext4_resize_begin(struct super_block *sb);
|
extern int ext4_resize_begin(struct super_block *sb);
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
* GNU General Public License for more details.
|
* GNU General Public License for more details.
|
||||||
*
|
*
|
||||||
* You should have received a copy of the GNU General Public Licens
|
* You should have received a copy of the GNU General Public License
|
||||||
* along with this program; if not, write to the Free Software
|
* along with this program; if not, write to the Free Software
|
||||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
* GNU General Public License for more details.
|
* GNU General Public License for more details.
|
||||||
*
|
*
|
||||||
* You should have received a copy of the GNU General Public Licens
|
* You should have received a copy of the GNU General Public License
|
||||||
* along with this program; if not, write to the Free Software
|
* along with this program; if not, write to the Free Software
|
||||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
|
||||||
*/
|
*/
|
||||||
|
@ -1736,6 +1736,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
|
||||||
*/
|
*/
|
||||||
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
|
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
|
||||||
return 0;
|
return 0;
|
||||||
|
/*
|
||||||
|
* The check for IO to unwritten extent is somewhat racy as we
|
||||||
|
* increment i_unwritten / set EXT4_STATE_DIO_UNWRITTEN only after
|
||||||
|
* dropping i_data_sem. But reserved blocks should save us in that
|
||||||
|
* case.
|
||||||
|
*/
|
||||||
if (ext4_ext_is_unwritten(ex1) &&
|
if (ext4_ext_is_unwritten(ex1) &&
|
||||||
(ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
|
(ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
|
||||||
atomic_read(&EXT4_I(inode)->i_unwritten) ||
|
atomic_read(&EXT4_I(inode)->i_unwritten) ||
|
||||||
|
@ -2292,60 +2298,70 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ext4_ext_determine_hole - determine hole around given block
|
||||||
|
* @inode: inode we lookup in
|
||||||
|
* @path: path in extent tree to @lblk
|
||||||
|
* @lblk: pointer to logical block around which we want to determine hole
|
||||||
|
*
|
||||||
|
* Determine hole length (and start if easily possible) around given logical
|
||||||
|
* block. We don't try too hard to find the beginning of the hole but @path
|
||||||
|
* actually points to extent before @lblk, we provide it.
|
||||||
|
*
|
||||||
|
* The function returns the length of a hole starting at @lblk. We update @lblk
|
||||||
|
* to the beginning of the hole if we managed to find it.
|
||||||
|
*/
|
||||||
|
static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
|
||||||
|
struct ext4_ext_path *path,
|
||||||
|
ext4_lblk_t *lblk)
|
||||||
|
{
|
||||||
|
int depth = ext_depth(inode);
|
||||||
|
struct ext4_extent *ex;
|
||||||
|
ext4_lblk_t len;
|
||||||
|
|
||||||
|
ex = path[depth].p_ext;
|
||||||
|
if (ex == NULL) {
|
||||||
|
/* there is no extent yet, so gap is [0;-] */
|
||||||
|
*lblk = 0;
|
||||||
|
len = EXT_MAX_BLOCKS;
|
||||||
|
} else if (*lblk < le32_to_cpu(ex->ee_block)) {
|
||||||
|
len = le32_to_cpu(ex->ee_block) - *lblk;
|
||||||
|
} else if (*lblk >= le32_to_cpu(ex->ee_block)
|
||||||
|
+ ext4_ext_get_actual_len(ex)) {
|
||||||
|
ext4_lblk_t next;
|
||||||
|
|
||||||
|
*lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
|
||||||
|
next = ext4_ext_next_allocated_block(path);
|
||||||
|
BUG_ON(next == *lblk);
|
||||||
|
len = next - *lblk;
|
||||||
|
} else {
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ext4_ext_put_gap_in_cache:
|
* ext4_ext_put_gap_in_cache:
|
||||||
* calculate boundaries of the gap that the requested block fits into
|
* calculate boundaries of the gap that the requested block fits into
|
||||||
* and cache this gap
|
* and cache this gap
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
|
ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
|
||||||
ext4_lblk_t block)
|
ext4_lblk_t hole_len)
|
||||||
{
|
{
|
||||||
int depth = ext_depth(inode);
|
|
||||||
ext4_lblk_t len;
|
|
||||||
ext4_lblk_t lblock;
|
|
||||||
struct ext4_extent *ex;
|
|
||||||
struct extent_status es;
|
struct extent_status es;
|
||||||
|
|
||||||
ex = path[depth].p_ext;
|
ext4_es_find_delayed_extent_range(inode, hole_start,
|
||||||
if (ex == NULL) {
|
hole_start + hole_len - 1, &es);
|
||||||
/* there is no extent yet, so gap is [0;-] */
|
|
||||||
lblock = 0;
|
|
||||||
len = EXT_MAX_BLOCKS;
|
|
||||||
ext_debug("cache gap(whole file):");
|
|
||||||
} else if (block < le32_to_cpu(ex->ee_block)) {
|
|
||||||
lblock = block;
|
|
||||||
len = le32_to_cpu(ex->ee_block) - block;
|
|
||||||
ext_debug("cache gap(before): %u [%u:%u]",
|
|
||||||
block,
|
|
||||||
le32_to_cpu(ex->ee_block),
|
|
||||||
ext4_ext_get_actual_len(ex));
|
|
||||||
} else if (block >= le32_to_cpu(ex->ee_block)
|
|
||||||
+ ext4_ext_get_actual_len(ex)) {
|
|
||||||
ext4_lblk_t next;
|
|
||||||
lblock = le32_to_cpu(ex->ee_block)
|
|
||||||
+ ext4_ext_get_actual_len(ex);
|
|
||||||
|
|
||||||
next = ext4_ext_next_allocated_block(path);
|
|
||||||
ext_debug("cache gap(after): [%u:%u] %u",
|
|
||||||
le32_to_cpu(ex->ee_block),
|
|
||||||
ext4_ext_get_actual_len(ex),
|
|
||||||
block);
|
|
||||||
BUG_ON(next == lblock);
|
|
||||||
len = next - lblock;
|
|
||||||
} else {
|
|
||||||
BUG();
|
|
||||||
}
|
|
||||||
|
|
||||||
ext4_es_find_delayed_extent_range(inode, lblock, lblock + len - 1, &es);
|
|
||||||
if (es.es_len) {
|
if (es.es_len) {
|
||||||
/* There's delayed extent containing lblock? */
|
/* There's delayed extent containing lblock? */
|
||||||
if (es.es_lblk <= lblock)
|
if (es.es_lblk <= hole_start)
|
||||||
return;
|
return;
|
||||||
len = min(es.es_lblk - lblock, len);
|
hole_len = min(es.es_lblk - hole_start, hole_len);
|
||||||
}
|
}
|
||||||
ext_debug(" -> %u:%u\n", lblock, len);
|
ext_debug(" -> %u:%u\n", hole_start, hole_len);
|
||||||
ext4_es_insert_extent(inode, lblock, len, ~0, EXTENT_STATUS_HOLE);
|
ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
|
||||||
|
EXTENT_STATUS_HOLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3927,7 +3943,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
|
||||||
static int
|
static int
|
||||||
convert_initialized_extent(handle_t *handle, struct inode *inode,
|
convert_initialized_extent(handle_t *handle, struct inode *inode,
|
||||||
struct ext4_map_blocks *map,
|
struct ext4_map_blocks *map,
|
||||||
struct ext4_ext_path **ppath, int flags,
|
struct ext4_ext_path **ppath,
|
||||||
unsigned int allocated)
|
unsigned int allocated)
|
||||||
{
|
{
|
||||||
struct ext4_ext_path *path = *ppath;
|
struct ext4_ext_path *path = *ppath;
|
||||||
|
@ -4007,7 +4023,6 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
|
||||||
struct ext4_ext_path *path = *ppath;
|
struct ext4_ext_path *path = *ppath;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
ext4_io_end_t *io = ext4_inode_aio(inode);
|
|
||||||
|
|
||||||
ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical "
|
ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical "
|
||||||
"block %llu, max_blocks %u, flags %x, allocated %u\n",
|
"block %llu, max_blocks %u, flags %x, allocated %u\n",
|
||||||
|
@ -4030,15 +4045,6 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
|
||||||
flags | EXT4_GET_BLOCKS_CONVERT);
|
flags | EXT4_GET_BLOCKS_CONVERT);
|
||||||
if (ret <= 0)
|
if (ret <= 0)
|
||||||
goto out;
|
goto out;
|
||||||
/*
|
|
||||||
* Flag the inode(non aio case) or end_io struct (aio case)
|
|
||||||
* that this IO needs to conversion to written when IO is
|
|
||||||
* completed
|
|
||||||
*/
|
|
||||||
if (io)
|
|
||||||
ext4_set_io_unwritten_flag(inode, io);
|
|
||||||
else
|
|
||||||
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
|
|
||||||
map->m_flags |= EXT4_MAP_UNWRITTEN;
|
map->m_flags |= EXT4_MAP_UNWRITTEN;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -4283,9 +4289,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
||||||
unsigned int allocated = 0, offset = 0;
|
unsigned int allocated = 0, offset = 0;
|
||||||
unsigned int allocated_clusters = 0;
|
unsigned int allocated_clusters = 0;
|
||||||
struct ext4_allocation_request ar;
|
struct ext4_allocation_request ar;
|
||||||
ext4_io_end_t *io = ext4_inode_aio(inode);
|
|
||||||
ext4_lblk_t cluster_offset;
|
ext4_lblk_t cluster_offset;
|
||||||
int set_unwritten = 0;
|
|
||||||
bool map_from_cluster = false;
|
bool map_from_cluster = false;
|
||||||
|
|
||||||
ext_debug("blocks %u/%u requested for inode %lu\n",
|
ext_debug("blocks %u/%u requested for inode %lu\n",
|
||||||
|
@ -4347,7 +4351,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
||||||
(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
|
(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
|
||||||
allocated = convert_initialized_extent(
|
allocated = convert_initialized_extent(
|
||||||
handle, inode, map, &path,
|
handle, inode, map, &path,
|
||||||
flags, allocated);
|
allocated);
|
||||||
goto out2;
|
goto out2;
|
||||||
} else if (!ext4_ext_is_unwritten(ex))
|
} else if (!ext4_ext_is_unwritten(ex))
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -4368,11 +4372,22 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
||||||
* we couldn't try to create block if create flag is zero
|
* we couldn't try to create block if create flag is zero
|
||||||
*/
|
*/
|
||||||
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
|
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
|
||||||
|
ext4_lblk_t hole_start, hole_len;
|
||||||
|
|
||||||
|
hole_start = map->m_lblk;
|
||||||
|
hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
|
||||||
/*
|
/*
|
||||||
* put just found gap into cache to speed up
|
* put just found gap into cache to speed up
|
||||||
* subsequent requests
|
* subsequent requests
|
||||||
*/
|
*/
|
||||||
ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
|
ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
|
||||||
|
|
||||||
|
/* Update hole_len to reflect hole size after map->m_lblk */
|
||||||
|
if (hole_start != map->m_lblk)
|
||||||
|
hole_len -= map->m_lblk - hole_start;
|
||||||
|
map->m_pblk = 0;
|
||||||
|
map->m_len = min_t(unsigned int, map->m_len, hole_len);
|
||||||
|
|
||||||
goto out2;
|
goto out2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4482,15 +4497,6 @@ got_allocated_blocks:
|
||||||
if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){
|
if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){
|
||||||
ext4_ext_mark_unwritten(&newex);
|
ext4_ext_mark_unwritten(&newex);
|
||||||
map->m_flags |= EXT4_MAP_UNWRITTEN;
|
map->m_flags |= EXT4_MAP_UNWRITTEN;
|
||||||
/*
|
|
||||||
* io_end structure was created for every IO write to an
|
|
||||||
* unwritten extent. To avoid unnecessary conversion,
|
|
||||||
* here we flag the IO that really needs the conversion.
|
|
||||||
* For non asycn direct IO case, flag the inode state
|
|
||||||
* that we need to perform conversion when IO is done.
|
|
||||||
*/
|
|
||||||
if (flags & EXT4_GET_BLOCKS_PRE_IO)
|
|
||||||
set_unwritten = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
err = 0;
|
err = 0;
|
||||||
|
@ -4501,14 +4507,6 @@ got_allocated_blocks:
|
||||||
err = ext4_ext_insert_extent(handle, inode, &path,
|
err = ext4_ext_insert_extent(handle, inode, &path,
|
||||||
&newex, flags);
|
&newex, flags);
|
||||||
|
|
||||||
if (!err && set_unwritten) {
|
|
||||||
if (io)
|
|
||||||
ext4_set_io_unwritten_flag(inode, io);
|
|
||||||
else
|
|
||||||
ext4_set_inode_state(inode,
|
|
||||||
EXT4_STATE_DIO_UNWRITTEN);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err && free_on_err) {
|
if (err && free_on_err) {
|
||||||
int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
|
int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
|
||||||
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
|
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
|
||||||
|
|
|
@ -823,8 +823,8 @@ out:
|
||||||
es->es_lblk = es1->es_lblk;
|
es->es_lblk = es1->es_lblk;
|
||||||
es->es_len = es1->es_len;
|
es->es_len = es1->es_len;
|
||||||
es->es_pblk = es1->es_pblk;
|
es->es_pblk = es1->es_pblk;
|
||||||
if (!ext4_es_is_referenced(es))
|
if (!ext4_es_is_referenced(es1))
|
||||||
ext4_es_set_referenced(es);
|
ext4_es_set_referenced(es1);
|
||||||
stats->es_stats_cache_hits++;
|
stats->es_stats_cache_hits++;
|
||||||
} else {
|
} else {
|
||||||
stats->es_stats_cache_misses++;
|
stats->es_stats_cache_misses++;
|
||||||
|
|
131
fs/ext4/file.c
131
fs/ext4/file.c
|
@ -93,31 +93,29 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
{
|
{
|
||||||
struct file *file = iocb->ki_filp;
|
struct file *file = iocb->ki_filp;
|
||||||
struct inode *inode = file_inode(iocb->ki_filp);
|
struct inode *inode = file_inode(iocb->ki_filp);
|
||||||
struct mutex *aio_mutex = NULL;
|
|
||||||
struct blk_plug plug;
|
struct blk_plug plug;
|
||||||
int o_direct = iocb->ki_flags & IOCB_DIRECT;
|
int o_direct = iocb->ki_flags & IOCB_DIRECT;
|
||||||
|
int unaligned_aio = 0;
|
||||||
int overwrite = 0;
|
int overwrite = 0;
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
|
||||||
/*
|
|
||||||
* Unaligned direct AIO must be serialized; see comment above
|
|
||||||
* In the case of O_APPEND, assume that we must always serialize
|
|
||||||
*/
|
|
||||||
if (o_direct &&
|
|
||||||
ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
|
|
||||||
!is_sync_kiocb(iocb) &&
|
|
||||||
(iocb->ki_flags & IOCB_APPEND ||
|
|
||||||
ext4_unaligned_aio(inode, from, iocb->ki_pos))) {
|
|
||||||
aio_mutex = ext4_aio_mutex(inode);
|
|
||||||
mutex_lock(aio_mutex);
|
|
||||||
ext4_unwritten_wait(inode);
|
|
||||||
}
|
|
||||||
|
|
||||||
inode_lock(inode);
|
inode_lock(inode);
|
||||||
ret = generic_write_checks(iocb, from);
|
ret = generic_write_checks(iocb, from);
|
||||||
if (ret <= 0)
|
if (ret <= 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unaligned direct AIO must be serialized among each other as zeroing
|
||||||
|
* of partial blocks of two competing unaligned AIOs can result in data
|
||||||
|
* corruption.
|
||||||
|
*/
|
||||||
|
if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
|
||||||
|
!is_sync_kiocb(iocb) &&
|
||||||
|
ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
|
||||||
|
unaligned_aio = 1;
|
||||||
|
ext4_unwritten_wait(inode);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we have encountered a bitmap-format file, the size limit
|
* If we have encountered a bitmap-format file, the size limit
|
||||||
* is smaller than s_maxbytes, which is for extent-mapped files.
|
* is smaller than s_maxbytes, which is for extent-mapped files.
|
||||||
|
@ -139,7 +137,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
blk_start_plug(&plug);
|
blk_start_plug(&plug);
|
||||||
|
|
||||||
/* check whether we do a DIO overwrite or not */
|
/* check whether we do a DIO overwrite or not */
|
||||||
if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
|
if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
|
||||||
!file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
|
!file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
|
||||||
struct ext4_map_blocks map;
|
struct ext4_map_blocks map;
|
||||||
unsigned int blkbits = inode->i_blkbits;
|
unsigned int blkbits = inode->i_blkbits;
|
||||||
|
@ -181,14 +179,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
if (o_direct)
|
if (o_direct)
|
||||||
blk_finish_plug(&plug);
|
blk_finish_plug(&plug);
|
||||||
|
|
||||||
if (aio_mutex)
|
|
||||||
mutex_unlock(aio_mutex);
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
inode_unlock(inode);
|
inode_unlock(inode);
|
||||||
if (aio_mutex)
|
|
||||||
mutex_unlock(aio_mutex);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -417,7 +411,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
|
||||||
*/
|
*/
|
||||||
static int ext4_find_unwritten_pgoff(struct inode *inode,
|
static int ext4_find_unwritten_pgoff(struct inode *inode,
|
||||||
int whence,
|
int whence,
|
||||||
struct ext4_map_blocks *map,
|
ext4_lblk_t end_blk,
|
||||||
loff_t *offset)
|
loff_t *offset)
|
||||||
{
|
{
|
||||||
struct pagevec pvec;
|
struct pagevec pvec;
|
||||||
|
@ -432,7 +426,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
|
||||||
blkbits = inode->i_sb->s_blocksize_bits;
|
blkbits = inode->i_sb->s_blocksize_bits;
|
||||||
startoff = *offset;
|
startoff = *offset;
|
||||||
lastoff = startoff;
|
lastoff = startoff;
|
||||||
endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
|
endoff = (loff_t)end_blk << blkbits;
|
||||||
|
|
||||||
index = startoff >> PAGE_CACHE_SHIFT;
|
index = startoff >> PAGE_CACHE_SHIFT;
|
||||||
end = endoff >> PAGE_CACHE_SHIFT;
|
end = endoff >> PAGE_CACHE_SHIFT;
|
||||||
|
@ -550,12 +544,11 @@ out:
|
||||||
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
||||||
{
|
{
|
||||||
struct inode *inode = file->f_mapping->host;
|
struct inode *inode = file->f_mapping->host;
|
||||||
struct ext4_map_blocks map;
|
|
||||||
struct extent_status es;
|
struct extent_status es;
|
||||||
ext4_lblk_t start, last, end;
|
ext4_lblk_t start, last, end;
|
||||||
loff_t dataoff, isize;
|
loff_t dataoff, isize;
|
||||||
int blkbits;
|
int blkbits;
|
||||||
int ret = 0;
|
int ret;
|
||||||
|
|
||||||
inode_lock(inode);
|
inode_lock(inode);
|
||||||
|
|
||||||
|
@ -572,41 +565,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
||||||
dataoff = offset;
|
dataoff = offset;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
map.m_lblk = last;
|
ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
|
||||||
map.m_len = end - last + 1;
|
if (ret <= 0) {
|
||||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
/* No extent found -> no data */
|
||||||
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
|
if (ret == 0)
|
||||||
if (last != start)
|
ret = -ENXIO;
|
||||||
dataoff = (loff_t)last << blkbits;
|
inode_unlock(inode);
|
||||||
break;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
last = es.es_lblk;
|
||||||
* If there is a delay extent at this offset,
|
if (last != start)
|
||||||
* it will be as a data.
|
dataoff = (loff_t)last << blkbits;
|
||||||
*/
|
if (!ext4_es_is_unwritten(&es))
|
||||||
ext4_es_find_delayed_extent_range(inode, last, last, &es);
|
|
||||||
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
|
|
||||||
if (last != start)
|
|
||||||
dataoff = (loff_t)last << blkbits;
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there is a unwritten extent at this offset,
|
* If there is a unwritten extent at this offset,
|
||||||
* it will be as a data or a hole according to page
|
* it will be as a data or a hole according to page
|
||||||
* cache that has data or not.
|
* cache that has data or not.
|
||||||
*/
|
*/
|
||||||
if (map.m_flags & EXT4_MAP_UNWRITTEN) {
|
if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
|
||||||
int unwritten;
|
es.es_lblk + es.es_len, &dataoff))
|
||||||
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
|
break;
|
||||||
&map, &dataoff);
|
last += es.es_len;
|
||||||
if (unwritten)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
last++;
|
|
||||||
dataoff = (loff_t)last << blkbits;
|
dataoff = (loff_t)last << blkbits;
|
||||||
|
cond_resched();
|
||||||
} while (last <= end);
|
} while (last <= end);
|
||||||
|
|
||||||
inode_unlock(inode);
|
inode_unlock(inode);
|
||||||
|
@ -623,12 +607,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
|
||||||
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
|
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
|
||||||
{
|
{
|
||||||
struct inode *inode = file->f_mapping->host;
|
struct inode *inode = file->f_mapping->host;
|
||||||
struct ext4_map_blocks map;
|
|
||||||
struct extent_status es;
|
struct extent_status es;
|
||||||
ext4_lblk_t start, last, end;
|
ext4_lblk_t start, last, end;
|
||||||
loff_t holeoff, isize;
|
loff_t holeoff, isize;
|
||||||
int blkbits;
|
int blkbits;
|
||||||
int ret = 0;
|
int ret;
|
||||||
|
|
||||||
inode_lock(inode);
|
inode_lock(inode);
|
||||||
|
|
||||||
|
@ -645,44 +628,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
|
||||||
holeoff = offset;
|
holeoff = offset;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
map.m_lblk = last;
|
ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
|
||||||
map.m_len = end - last + 1;
|
if (ret < 0) {
|
||||||
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
inode_unlock(inode);
|
||||||
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
|
return ret;
|
||||||
last += ret;
|
|
||||||
holeoff = (loff_t)last << blkbits;
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
/* Found a hole? */
|
||||||
/*
|
if (ret == 0 || es.es_lblk > last) {
|
||||||
* If there is a delay extent at this offset,
|
if (last != start)
|
||||||
* we will skip this extent.
|
holeoff = (loff_t)last << blkbits;
|
||||||
*/
|
break;
|
||||||
ext4_es_find_delayed_extent_range(inode, last, last, &es);
|
|
||||||
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
|
|
||||||
last = es.es_lblk + es.es_len;
|
|
||||||
holeoff = (loff_t)last << blkbits;
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there is a unwritten extent at this offset,
|
* If there is a unwritten extent at this offset,
|
||||||
* it will be as a data or a hole according to page
|
* it will be as a data or a hole according to page
|
||||||
* cache that has data or not.
|
* cache that has data or not.
|
||||||
*/
|
*/
|
||||||
if (map.m_flags & EXT4_MAP_UNWRITTEN) {
|
if (ext4_es_is_unwritten(&es) &&
|
||||||
int unwritten;
|
ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
|
||||||
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
|
last + es.es_len, &holeoff))
|
||||||
&map, &holeoff);
|
break;
|
||||||
if (!unwritten) {
|
|
||||||
last += ret;
|
|
||||||
holeoff = (loff_t)last << blkbits;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* find a hole */
|
last += es.es_len;
|
||||||
break;
|
holeoff = (loff_t)last << blkbits;
|
||||||
|
cond_resched();
|
||||||
} while (last <= end);
|
} while (last <= end);
|
||||||
|
|
||||||
inode_unlock(inode);
|
inode_unlock(inode);
|
||||||
|
|
|
@ -787,7 +787,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
|
||||||
sbi = EXT4_SB(sb);
|
sbi = EXT4_SB(sb);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initalize owners and quota early so that we don't have to account
|
* Initialize owners and quota early so that we don't have to account
|
||||||
* for quota initialization worst case in standard inode creating
|
* for quota initialization worst case in standard inode creating
|
||||||
* transaction
|
* transaction
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -555,8 +555,23 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
|
||||||
goto got_it;
|
goto got_it;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Next simple case - plain lookup or failed read of indirect block */
|
/* Next simple case - plain lookup failed */
|
||||||
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
|
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
|
||||||
|
unsigned epb = inode->i_sb->s_blocksize / sizeof(u32);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* Count number blocks in a subtree under 'partial' */
|
||||||
|
count = 1;
|
||||||
|
for (i = 0; partial + i != chain + depth - 1; i++)
|
||||||
|
count *= epb;
|
||||||
|
/* Fill in size of a hole we found */
|
||||||
|
map->m_pblk = 0;
|
||||||
|
map->m_len = min_t(unsigned int, map->m_len, count);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Failed read of indirect block */
|
||||||
|
if (err == -EIO)
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -693,21 +708,21 @@ retry:
|
||||||
}
|
}
|
||||||
if (IS_DAX(inode))
|
if (IS_DAX(inode))
|
||||||
ret = dax_do_io(iocb, inode, iter, offset,
|
ret = dax_do_io(iocb, inode, iter, offset,
|
||||||
ext4_get_block, NULL, 0);
|
ext4_dio_get_block, NULL, 0);
|
||||||
else
|
else
|
||||||
ret = __blockdev_direct_IO(iocb, inode,
|
ret = __blockdev_direct_IO(iocb, inode,
|
||||||
inode->i_sb->s_bdev, iter,
|
inode->i_sb->s_bdev, iter,
|
||||||
offset, ext4_get_block, NULL,
|
offset, ext4_dio_get_block,
|
||||||
NULL, 0);
|
NULL, NULL, 0);
|
||||||
inode_dio_end(inode);
|
inode_dio_end(inode);
|
||||||
} else {
|
} else {
|
||||||
locked:
|
locked:
|
||||||
if (IS_DAX(inode))
|
if (IS_DAX(inode))
|
||||||
ret = dax_do_io(iocb, inode, iter, offset,
|
ret = dax_do_io(iocb, inode, iter, offset,
|
||||||
ext4_get_block, NULL, DIO_LOCKING);
|
ext4_dio_get_block, NULL, DIO_LOCKING);
|
||||||
else
|
else
|
||||||
ret = blockdev_direct_IO(iocb, inode, iter, offset,
|
ret = blockdev_direct_IO(iocb, inode, iter, offset,
|
||||||
ext4_get_block);
|
ext4_dio_get_block);
|
||||||
|
|
||||||
if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
|
if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
|
||||||
loff_t isize = i_size_read(inode);
|
loff_t isize = i_size_read(inode);
|
||||||
|
|
|
@ -581,9 +581,10 @@ retry:
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (ext4_should_dioread_nolock(inode))
|
if (ext4_should_dioread_nolock(inode)) {
|
||||||
ret = __block_write_begin(page, from, to, ext4_get_block_write);
|
ret = __block_write_begin(page, from, to,
|
||||||
else
|
ext4_get_block_unwritten);
|
||||||
|
} else
|
||||||
ret = __block_write_begin(page, from, to, ext4_get_block);
|
ret = __block_write_begin(page, from, to, ext4_get_block);
|
||||||
|
|
||||||
if (!ret && ext4_should_journal_data(inode)) {
|
if (!ret && ext4_should_journal_data(inode)) {
|
||||||
|
@ -1696,7 +1697,6 @@ int ext4_delete_inline_entry(handle_t *handle,
|
||||||
if (err)
|
if (err)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
|
|
||||||
err = ext4_mark_inode_dirty(handle, dir);
|
err = ext4_mark_inode_dirty(handle, dir);
|
||||||
if (unlikely(err))
|
if (unlikely(err))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
388
fs/ext4/inode.c
388
fs/ext4/inode.c
|
@ -216,7 +216,6 @@ void ext4_evict_inode(struct inode *inode)
|
||||||
}
|
}
|
||||||
truncate_inode_pages_final(&inode->i_data);
|
truncate_inode_pages_final(&inode->i_data);
|
||||||
|
|
||||||
WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
|
|
||||||
goto no_delete;
|
goto no_delete;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -228,8 +227,6 @@ void ext4_evict_inode(struct inode *inode)
|
||||||
ext4_begin_ordered_truncate(inode, 0);
|
ext4_begin_ordered_truncate(inode, 0);
|
||||||
truncate_inode_pages_final(&inode->i_data);
|
truncate_inode_pages_final(&inode->i_data);
|
||||||
|
|
||||||
WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Protect us against freezing - iput() caller didn't have to have any
|
* Protect us against freezing - iput() caller didn't have to have any
|
||||||
* protection against it
|
* protection against it
|
||||||
|
@ -458,13 +455,13 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
|
||||||
* Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
|
* Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
|
||||||
* based files
|
* based files
|
||||||
*
|
*
|
||||||
* On success, it returns the number of blocks being mapped or allocated.
|
* On success, it returns the number of blocks being mapped or allocated. if
|
||||||
* if create==0 and the blocks are pre-allocated and unwritten block,
|
* create==0 and the blocks are pre-allocated and unwritten, the resulting @map
|
||||||
* the result buffer head is unmapped. If the create ==1, it will make sure
|
* is marked as unwritten. If the create == 1, it will mark @map as mapped.
|
||||||
* the buffer head is mapped.
|
|
||||||
*
|
*
|
||||||
* It returns 0 if plain look up failed (blocks have not been allocated), in
|
* It returns 0 if plain look up failed (blocks have not been allocated), in
|
||||||
* that case, buffer head is unmapped
|
* that case, @map is returned as unmapped but we still do fill map->m_len to
|
||||||
|
* indicate the length of a hole starting at map->m_lblk.
|
||||||
*
|
*
|
||||||
* It returns the error in case of allocation failure.
|
* It returns the error in case of allocation failure.
|
||||||
*/
|
*/
|
||||||
|
@ -507,6 +504,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||||
retval = map->m_len;
|
retval = map->m_len;
|
||||||
map->m_len = retval;
|
map->m_len = retval;
|
||||||
} else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
|
} else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
|
||||||
|
map->m_pblk = 0;
|
||||||
|
retval = es.es_len - (map->m_lblk - es.es_lblk);
|
||||||
|
if (retval > map->m_len)
|
||||||
|
retval = map->m_len;
|
||||||
|
map->m_len = retval;
|
||||||
retval = 0;
|
retval = 0;
|
||||||
} else {
|
} else {
|
||||||
BUG_ON(1);
|
BUG_ON(1);
|
||||||
|
@ -714,16 +716,11 @@ static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags)
|
||||||
cmpxchg(&bh->b_state, old_state, new_state) != old_state));
|
cmpxchg(&bh->b_state, old_state, new_state) != old_state));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Maximum number of blocks we map for direct IO at once. */
|
|
||||||
#define DIO_MAX_BLOCKS 4096
|
|
||||||
|
|
||||||
static int _ext4_get_block(struct inode *inode, sector_t iblock,
|
static int _ext4_get_block(struct inode *inode, sector_t iblock,
|
||||||
struct buffer_head *bh, int flags)
|
struct buffer_head *bh, int flags)
|
||||||
{
|
{
|
||||||
handle_t *handle = ext4_journal_current_handle();
|
|
||||||
struct ext4_map_blocks map;
|
struct ext4_map_blocks map;
|
||||||
int ret = 0, started = 0;
|
int ret = 0;
|
||||||
int dio_credits;
|
|
||||||
|
|
||||||
if (ext4_has_inline_data(inode))
|
if (ext4_has_inline_data(inode))
|
||||||
return -ERANGE;
|
return -ERANGE;
|
||||||
|
@ -731,33 +728,14 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
|
||||||
map.m_lblk = iblock;
|
map.m_lblk = iblock;
|
||||||
map.m_len = bh->b_size >> inode->i_blkbits;
|
map.m_len = bh->b_size >> inode->i_blkbits;
|
||||||
|
|
||||||
if (flags && !handle) {
|
ret = ext4_map_blocks(ext4_journal_current_handle(), inode, &map,
|
||||||
/* Direct IO write... */
|
flags);
|
||||||
if (map.m_len > DIO_MAX_BLOCKS)
|
|
||||||
map.m_len = DIO_MAX_BLOCKS;
|
|
||||||
dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
|
|
||||||
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
|
|
||||||
dio_credits);
|
|
||||||
if (IS_ERR(handle)) {
|
|
||||||
ret = PTR_ERR(handle);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
started = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = ext4_map_blocks(handle, inode, &map, flags);
|
|
||||||
if (ret > 0) {
|
if (ret > 0) {
|
||||||
ext4_io_end_t *io_end = ext4_inode_aio(inode);
|
|
||||||
|
|
||||||
map_bh(bh, inode->i_sb, map.m_pblk);
|
map_bh(bh, inode->i_sb, map.m_pblk);
|
||||||
ext4_update_bh_state(bh, map.m_flags);
|
ext4_update_bh_state(bh, map.m_flags);
|
||||||
if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
|
|
||||||
set_buffer_defer_completion(bh);
|
|
||||||
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
|
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
if (started)
|
|
||||||
ext4_journal_stop(handle);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -768,6 +746,155 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
|
||||||
create ? EXT4_GET_BLOCKS_CREATE : 0);
|
create ? EXT4_GET_BLOCKS_CREATE : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get block function used when preparing for buffered write if we require
|
||||||
|
* creating an unwritten extent if blocks haven't been allocated. The extent
|
||||||
|
* will be converted to written after the IO is complete.
|
||||||
|
*/
|
||||||
|
int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
|
||||||
|
struct buffer_head *bh_result, int create)
|
||||||
|
{
|
||||||
|
ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n",
|
||||||
|
inode->i_ino, create);
|
||||||
|
return _ext4_get_block(inode, iblock, bh_result,
|
||||||
|
EXT4_GET_BLOCKS_IO_CREATE_EXT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Maximum number of blocks we map for direct IO at once. */
|
||||||
|
#define DIO_MAX_BLOCKS 4096
|
||||||
|
|
||||||
|
static handle_t *start_dio_trans(struct inode *inode,
|
||||||
|
struct buffer_head *bh_result)
|
||||||
|
{
|
||||||
|
int dio_credits;
|
||||||
|
|
||||||
|
/* Trim mapping request to maximum we can map at once for DIO */
|
||||||
|
if (bh_result->b_size >> inode->i_blkbits > DIO_MAX_BLOCKS)
|
||||||
|
bh_result->b_size = DIO_MAX_BLOCKS << inode->i_blkbits;
|
||||||
|
dio_credits = ext4_chunk_trans_blocks(inode,
|
||||||
|
bh_result->b_size >> inode->i_blkbits);
|
||||||
|
return ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get block function for DIO reads and writes to inodes without extents */
|
||||||
|
int ext4_dio_get_block(struct inode *inode, sector_t iblock,
|
||||||
|
struct buffer_head *bh, int create)
|
||||||
|
{
|
||||||
|
handle_t *handle;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* We don't expect handle for direct IO */
|
||||||
|
WARN_ON_ONCE(ext4_journal_current_handle());
|
||||||
|
|
||||||
|
if (create) {
|
||||||
|
handle = start_dio_trans(inode, bh);
|
||||||
|
if (IS_ERR(handle))
|
||||||
|
return PTR_ERR(handle);
|
||||||
|
}
|
||||||
|
ret = _ext4_get_block(inode, iblock, bh,
|
||||||
|
create ? EXT4_GET_BLOCKS_CREATE : 0);
|
||||||
|
if (create)
|
||||||
|
ext4_journal_stop(handle);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get block function for AIO DIO writes when we create unwritten extent if
|
||||||
|
* blocks are not allocated yet. The extent will be converted to written
|
||||||
|
* after IO is complete.
|
||||||
|
*/
|
||||||
|
static int ext4_dio_get_block_unwritten_async(struct inode *inode,
|
||||||
|
sector_t iblock, struct buffer_head *bh_result, int create)
|
||||||
|
{
|
||||||
|
handle_t *handle;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* We don't expect handle for direct IO */
|
||||||
|
WARN_ON_ONCE(ext4_journal_current_handle());
|
||||||
|
|
||||||
|
handle = start_dio_trans(inode, bh_result);
|
||||||
|
if (IS_ERR(handle))
|
||||||
|
return PTR_ERR(handle);
|
||||||
|
ret = _ext4_get_block(inode, iblock, bh_result,
|
||||||
|
EXT4_GET_BLOCKS_IO_CREATE_EXT);
|
||||||
|
ext4_journal_stop(handle);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When doing DIO using unwritten extents, we need io_end to convert
|
||||||
|
* unwritten extents to written on IO completion. We allocate io_end
|
||||||
|
* once we spot unwritten extent and store it in b_private. Generic
|
||||||
|
* DIO code keeps b_private set and furthermore passes the value to
|
||||||
|
* our completion callback in 'private' argument.
|
||||||
|
*/
|
||||||
|
if (!ret && buffer_unwritten(bh_result)) {
|
||||||
|
if (!bh_result->b_private) {
|
||||||
|
ext4_io_end_t *io_end;
|
||||||
|
|
||||||
|
io_end = ext4_init_io_end(inode, GFP_KERNEL);
|
||||||
|
if (!io_end)
|
||||||
|
return -ENOMEM;
|
||||||
|
bh_result->b_private = io_end;
|
||||||
|
ext4_set_io_unwritten_flag(inode, io_end);
|
||||||
|
}
|
||||||
|
set_buffer_defer_completion(bh_result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get block function for non-AIO DIO writes when we create unwritten extent if
|
||||||
|
* blocks are not allocated yet. The extent will be converted to written
|
||||||
|
* after IO is complete from ext4_ext_direct_IO() function.
|
||||||
|
*/
|
||||||
|
static int ext4_dio_get_block_unwritten_sync(struct inode *inode,
|
||||||
|
sector_t iblock, struct buffer_head *bh_result, int create)
|
||||||
|
{
|
||||||
|
handle_t *handle;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* We don't expect handle for direct IO */
|
||||||
|
WARN_ON_ONCE(ext4_journal_current_handle());
|
||||||
|
|
||||||
|
handle = start_dio_trans(inode, bh_result);
|
||||||
|
if (IS_ERR(handle))
|
||||||
|
return PTR_ERR(handle);
|
||||||
|
ret = _ext4_get_block(inode, iblock, bh_result,
|
||||||
|
EXT4_GET_BLOCKS_IO_CREATE_EXT);
|
||||||
|
ext4_journal_stop(handle);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mark inode as having pending DIO writes to unwritten extents.
|
||||||
|
* ext4_ext_direct_IO() checks this flag and converts extents to
|
||||||
|
* written.
|
||||||
|
*/
|
||||||
|
if (!ret && buffer_unwritten(bh_result))
|
||||||
|
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ext4_dio_get_block_overwrite(struct inode *inode, sector_t iblock,
|
||||||
|
struct buffer_head *bh_result, int create)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ext4_debug("ext4_dio_get_block_overwrite: inode %lu, create flag %d\n",
|
||||||
|
inode->i_ino, create);
|
||||||
|
/* We don't expect handle for direct IO */
|
||||||
|
WARN_ON_ONCE(ext4_journal_current_handle());
|
||||||
|
|
||||||
|
ret = _ext4_get_block(inode, iblock, bh_result, 0);
|
||||||
|
/*
|
||||||
|
* Blocks should have been preallocated! ext4_file_write_iter() checks
|
||||||
|
* that.
|
||||||
|
*/
|
||||||
|
WARN_ON_ONCE(!buffer_mapped(bh_result) || buffer_unwritten(bh_result));
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* `handle' can be NULL if create is zero
|
* `handle' can be NULL if create is zero
|
||||||
*/
|
*/
|
||||||
|
@ -1079,13 +1206,14 @@ retry_journal:
|
||||||
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
||||||
if (ext4_should_dioread_nolock(inode))
|
if (ext4_should_dioread_nolock(inode))
|
||||||
ret = ext4_block_write_begin(page, pos, len,
|
ret = ext4_block_write_begin(page, pos, len,
|
||||||
ext4_get_block_write);
|
ext4_get_block_unwritten);
|
||||||
else
|
else
|
||||||
ret = ext4_block_write_begin(page, pos, len,
|
ret = ext4_block_write_begin(page, pos, len,
|
||||||
ext4_get_block);
|
ext4_get_block);
|
||||||
#else
|
#else
|
||||||
if (ext4_should_dioread_nolock(inode))
|
if (ext4_should_dioread_nolock(inode))
|
||||||
ret = __block_write_begin(page, pos, len, ext4_get_block_write);
|
ret = __block_write_begin(page, pos, len,
|
||||||
|
ext4_get_block_unwritten);
|
||||||
else
|
else
|
||||||
ret = __block_write_begin(page, pos, len, ext4_get_block);
|
ret = __block_write_begin(page, pos, len, ext4_get_block);
|
||||||
#endif
|
#endif
|
||||||
|
@ -3088,37 +3216,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
|
||||||
return try_to_free_buffers(page);
|
return try_to_free_buffers(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* ext4_get_block used when preparing for a DIO write or buffer write.
|
|
||||||
* We allocate an uinitialized extent if blocks haven't been allocated.
|
|
||||||
* The extent will be converted to initialized after the IO is complete.
|
|
||||||
*/
|
|
||||||
int ext4_get_block_write(struct inode *inode, sector_t iblock,
|
|
||||||
struct buffer_head *bh_result, int create)
|
|
||||||
{
|
|
||||||
ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
|
|
||||||
inode->i_ino, create);
|
|
||||||
return _ext4_get_block(inode, iblock, bh_result,
|
|
||||||
EXT4_GET_BLOCKS_IO_CREATE_EXT);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
|
|
||||||
struct buffer_head *bh_result, int create)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
|
|
||||||
inode->i_ino, create);
|
|
||||||
ret = _ext4_get_block(inode, iblock, bh_result, 0);
|
|
||||||
/*
|
|
||||||
* Blocks should have been preallocated! ext4_file_write_iter() checks
|
|
||||||
* that.
|
|
||||||
*/
|
|
||||||
WARN_ON_ONCE(!buffer_mapped(bh_result));
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_FS_DAX
|
#ifdef CONFIG_FS_DAX
|
||||||
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
|
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
|
||||||
struct buffer_head *bh_result, int create)
|
struct buffer_head *bh_result, int create)
|
||||||
|
@ -3179,13 +3276,12 @@ out:
|
||||||
WARN_ON_ONCE(ret == 0 && create);
|
WARN_ON_ONCE(ret == 0 && create);
|
||||||
if (ret > 0) {
|
if (ret > 0) {
|
||||||
map_bh(bh_result, inode->i_sb, map.m_pblk);
|
map_bh(bh_result, inode->i_sb, map.m_pblk);
|
||||||
bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
|
|
||||||
map.m_flags;
|
|
||||||
/*
|
/*
|
||||||
* At least for now we have to clear BH_New so that DAX code
|
* At least for now we have to clear BH_New so that DAX code
|
||||||
* doesn't attempt to zero blocks again in a racy way.
|
* doesn't attempt to zero blocks again in a racy way.
|
||||||
*/
|
*/
|
||||||
bh_result->b_state &= ~(1 << BH_New);
|
map.m_flags &= ~EXT4_MAP_NEW;
|
||||||
|
ext4_update_bh_state(bh_result, map.m_flags);
|
||||||
bh_result->b_size = map.m_len << inode->i_blkbits;
|
bh_result->b_size = map.m_len << inode->i_blkbits;
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
|
@ -3196,7 +3292,7 @@ out:
|
||||||
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
||||||
ssize_t size, void *private)
|
ssize_t size, void *private)
|
||||||
{
|
{
|
||||||
ext4_io_end_t *io_end = iocb->private;
|
ext4_io_end_t *io_end = private;
|
||||||
|
|
||||||
/* if not async direct IO just return */
|
/* if not async direct IO just return */
|
||||||
if (!io_end)
|
if (!io_end)
|
||||||
|
@ -3204,10 +3300,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
||||||
|
|
||||||
ext_debug("ext4_end_io_dio(): io_end 0x%p "
|
ext_debug("ext4_end_io_dio(): io_end 0x%p "
|
||||||
"for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
|
"for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
|
||||||
iocb->private, io_end->inode->i_ino, iocb, offset,
|
io_end, io_end->inode->i_ino, iocb, offset, size);
|
||||||
size);
|
|
||||||
|
|
||||||
iocb->private = NULL;
|
|
||||||
io_end->offset = offset;
|
io_end->offset = offset;
|
||||||
io_end->size = size;
|
io_end->size = size;
|
||||||
ext4_put_io_end(io_end);
|
ext4_put_io_end(io_end);
|
||||||
|
@ -3243,7 +3337,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
get_block_t *get_block_func = NULL;
|
get_block_t *get_block_func = NULL;
|
||||||
int dio_flags = 0;
|
int dio_flags = 0;
|
||||||
loff_t final_size = offset + count;
|
loff_t final_size = offset + count;
|
||||||
ext4_io_end_t *io_end = NULL;
|
|
||||||
|
|
||||||
/* Use the old path for reads and writes beyond i_size. */
|
/* Use the old path for reads and writes beyond i_size. */
|
||||||
if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size)
|
if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size)
|
||||||
|
@ -3268,16 +3361,17 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
/*
|
/*
|
||||||
* We could direct write to holes and fallocate.
|
* We could direct write to holes and fallocate.
|
||||||
*
|
*
|
||||||
* Allocated blocks to fill the hole are marked as
|
* Allocated blocks to fill the hole are marked as unwritten to prevent
|
||||||
* unwritten to prevent parallel buffered read to expose
|
* parallel buffered read to expose the stale data before DIO complete
|
||||||
* the stale data before DIO complete the data IO.
|
* the data IO.
|
||||||
*
|
*
|
||||||
* As to previously fallocated extents, ext4 get_block will
|
* As to previously fallocated extents, ext4 get_block will just simply
|
||||||
* just simply mark the buffer mapped but still keep the
|
* mark the buffer mapped but still keep the extents unwritten.
|
||||||
* extents unwritten.
|
|
||||||
*
|
*
|
||||||
* For non AIO case, we will convert those unwritten extents
|
* For non AIO case, we will convert those unwritten extents to written
|
||||||
* to written after return back from blockdev_direct_IO.
|
* after return back from blockdev_direct_IO. That way we save us from
|
||||||
|
* allocating io_end structure and also the overhead of offloading
|
||||||
|
* the extent convertion to a workqueue.
|
||||||
*
|
*
|
||||||
* For async DIO, the conversion needs to be deferred when the
|
* For async DIO, the conversion needs to be deferred when the
|
||||||
* IO is completed. The ext4 end_io callback function will be
|
* IO is completed. The ext4 end_io callback function will be
|
||||||
|
@ -3285,30 +3379,13 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
* case, we allocate an io_end structure to hook to the iocb.
|
* case, we allocate an io_end structure to hook to the iocb.
|
||||||
*/
|
*/
|
||||||
iocb->private = NULL;
|
iocb->private = NULL;
|
||||||
if (overwrite) {
|
if (overwrite)
|
||||||
get_block_func = ext4_get_block_overwrite;
|
get_block_func = ext4_dio_get_block_overwrite;
|
||||||
|
else if (is_sync_kiocb(iocb)) {
|
||||||
|
get_block_func = ext4_dio_get_block_unwritten_sync;
|
||||||
|
dio_flags = DIO_LOCKING;
|
||||||
} else {
|
} else {
|
||||||
ext4_inode_aio_set(inode, NULL);
|
get_block_func = ext4_dio_get_block_unwritten_async;
|
||||||
if (!is_sync_kiocb(iocb)) {
|
|
||||||
io_end = ext4_init_io_end(inode, GFP_NOFS);
|
|
||||||
if (!io_end) {
|
|
||||||
ret = -ENOMEM;
|
|
||||||
goto retake_lock;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Grab reference for DIO. Will be dropped in
|
|
||||||
* ext4_end_io_dio()
|
|
||||||
*/
|
|
||||||
iocb->private = ext4_get_io_end(io_end);
|
|
||||||
/*
|
|
||||||
* we save the io structure for current async direct
|
|
||||||
* IO, so that later ext4_map_blocks() could flag the
|
|
||||||
* io structure whether there is a unwritten extents
|
|
||||||
* needs to be converted when IO is completed.
|
|
||||||
*/
|
|
||||||
ext4_inode_aio_set(inode, io_end);
|
|
||||||
}
|
|
||||||
get_block_func = ext4_get_block_write;
|
|
||||||
dio_flags = DIO_LOCKING;
|
dio_flags = DIO_LOCKING;
|
||||||
}
|
}
|
||||||
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
||||||
|
@ -3323,27 +3400,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
get_block_func,
|
get_block_func,
|
||||||
ext4_end_io_dio, NULL, dio_flags);
|
ext4_end_io_dio, NULL, dio_flags);
|
||||||
|
|
||||||
/*
|
|
||||||
* Put our reference to io_end. This can free the io_end structure e.g.
|
|
||||||
* in sync IO case or in case of error. It can even perform extent
|
|
||||||
* conversion if all bios we submitted finished before we got here.
|
|
||||||
* Note that in that case iocb->private can be already set to NULL
|
|
||||||
* here.
|
|
||||||
*/
|
|
||||||
if (io_end) {
|
|
||||||
ext4_inode_aio_set(inode, NULL);
|
|
||||||
ext4_put_io_end(io_end);
|
|
||||||
/*
|
|
||||||
* When no IO was submitted ext4_end_io_dio() was not
|
|
||||||
* called so we have to put iocb's reference.
|
|
||||||
*/
|
|
||||||
if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) {
|
|
||||||
WARN_ON(iocb->private != io_end);
|
|
||||||
WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
|
|
||||||
ext4_put_io_end(io_end);
|
|
||||||
iocb->private = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
|
if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
|
||||||
EXT4_STATE_DIO_UNWRITTEN)) {
|
EXT4_STATE_DIO_UNWRITTEN)) {
|
||||||
int err;
|
int err;
|
||||||
|
@ -3358,7 +3414,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
|
ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
retake_lock:
|
|
||||||
if (iov_iter_rw(iter) == WRITE)
|
if (iov_iter_rw(iter) == WRITE)
|
||||||
inode_dio_end(inode);
|
inode_dio_end(inode);
|
||||||
/* take i_mutex locking again if we do a ovewrite dio */
|
/* take i_mutex locking again if we do a ovewrite dio */
|
||||||
|
@ -5261,6 +5316,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
|
||||||
might_sleep();
|
might_sleep();
|
||||||
trace_ext4_mark_inode_dirty(inode, _RET_IP_);
|
trace_ext4_mark_inode_dirty(inode, _RET_IP_);
|
||||||
err = ext4_reserve_inode_write(handle, inode, &iloc);
|
err = ext4_reserve_inode_write(handle, inode, &iloc);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
if (ext4_handle_valid(handle) &&
|
if (ext4_handle_valid(handle) &&
|
||||||
EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
|
EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
|
||||||
!ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
|
!ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
|
||||||
|
@ -5291,9 +5348,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!err)
|
return ext4_mark_iloc_dirty(handle, inode, &iloc);
|
||||||
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
|
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -5502,7 +5557,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
/* OK, we need to fill the hole... */
|
/* OK, we need to fill the hole... */
|
||||||
if (ext4_should_dioread_nolock(inode))
|
if (ext4_should_dioread_nolock(inode))
|
||||||
get_block = ext4_get_block_write;
|
get_block = ext4_get_block_unwritten;
|
||||||
else
|
else
|
||||||
get_block = ext4_get_block;
|
get_block = ext4_get_block;
|
||||||
retry_alloc:
|
retry_alloc:
|
||||||
|
@ -5545,3 +5600,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find the first extent at or after @lblk in an inode that is not a hole.
|
||||||
|
* Search for @map_len blocks at most. The extent is returned in @result.
|
||||||
|
*
|
||||||
|
* The function returns 1 if we found an extent. The function returns 0 in
|
||||||
|
* case there is no extent at or after @lblk and in that case also sets
|
||||||
|
* @result->es_len to 0. In case of error, the error code is returned.
|
||||||
|
*/
|
||||||
|
int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
|
unsigned int map_len, struct extent_status *result)
|
||||||
|
{
|
||||||
|
struct ext4_map_blocks map;
|
||||||
|
struct extent_status es = {};
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
map.m_lblk = lblk;
|
||||||
|
map.m_len = map_len;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For non-extent based files this loop may iterate several times since
|
||||||
|
* we do not determine full hole size.
|
||||||
|
*/
|
||||||
|
while (map.m_len > 0) {
|
||||||
|
ret = ext4_map_blocks(NULL, inode, &map, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
/* There's extent covering m_lblk? Just return it. */
|
||||||
|
if (ret > 0) {
|
||||||
|
int status;
|
||||||
|
|
||||||
|
ext4_es_store_pblock(result, map.m_pblk);
|
||||||
|
result->es_lblk = map.m_lblk;
|
||||||
|
result->es_len = map.m_len;
|
||||||
|
if (map.m_flags & EXT4_MAP_UNWRITTEN)
|
||||||
|
status = EXTENT_STATUS_UNWRITTEN;
|
||||||
|
else
|
||||||
|
status = EXTENT_STATUS_WRITTEN;
|
||||||
|
ext4_es_store_status(result, status);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
ext4_es_find_delayed_extent_range(inode, map.m_lblk,
|
||||||
|
map.m_lblk + map.m_len - 1,
|
||||||
|
&es);
|
||||||
|
/* Is delalloc data before next block in extent tree? */
|
||||||
|
if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
|
||||||
|
ext4_lblk_t offset = 0;
|
||||||
|
|
||||||
|
if (es.es_lblk < lblk)
|
||||||
|
offset = lblk - es.es_lblk;
|
||||||
|
result->es_lblk = es.es_lblk + offset;
|
||||||
|
ext4_es_store_pblock(result,
|
||||||
|
ext4_es_pblock(&es) + offset);
|
||||||
|
result->es_len = es.es_len - offset;
|
||||||
|
ext4_es_store_status(result, ext4_es_status(&es));
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
/* There's a hole at m_lblk, advance us after it */
|
||||||
|
map.m_lblk += map.m_len;
|
||||||
|
map_len -= map.m_len;
|
||||||
|
map.m_len = map_len;
|
||||||
|
cond_resched();
|
||||||
|
}
|
||||||
|
result->es_len = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
* GNU General Public License for more details.
|
* GNU General Public License for more details.
|
||||||
*
|
*
|
||||||
* You should have received a copy of the GNU General Public Licens
|
* You should have received a copy of the GNU General Public License
|
||||||
* along with this program; if not, write to the Free Software
|
* along with this program; if not, write to the Free Software
|
||||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
|
||||||
*/
|
*/
|
||||||
|
@ -815,7 +815,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
|
||||||
* for this page; do not hold this lock when calling this routine!
|
* for this page; do not hold this lock when calling this routine!
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int ext4_mb_init_cache(struct page *page, char *incore)
|
static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
|
||||||
{
|
{
|
||||||
ext4_group_t ngroups;
|
ext4_group_t ngroups;
|
||||||
int blocksize;
|
int blocksize;
|
||||||
|
@ -848,7 +848,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
||||||
/* allocate buffer_heads to read bitmaps */
|
/* allocate buffer_heads to read bitmaps */
|
||||||
if (groups_per_page > 1) {
|
if (groups_per_page > 1) {
|
||||||
i = sizeof(struct buffer_head *) * groups_per_page;
|
i = sizeof(struct buffer_head *) * groups_per_page;
|
||||||
bh = kzalloc(i, GFP_NOFS);
|
bh = kzalloc(i, gfp);
|
||||||
if (bh == NULL) {
|
if (bh == NULL) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -983,7 +983,7 @@ out:
|
||||||
* are on the same page e4b->bd_buddy_page is NULL and return value is 0.
|
* are on the same page e4b->bd_buddy_page is NULL and return value is 0.
|
||||||
*/
|
*/
|
||||||
static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
|
static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
|
||||||
ext4_group_t group, struct ext4_buddy *e4b)
|
ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
|
||||||
{
|
{
|
||||||
struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
|
struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
|
||||||
int block, pnum, poff;
|
int block, pnum, poff;
|
||||||
|
@ -1002,7 +1002,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
|
||||||
block = group * 2;
|
block = group * 2;
|
||||||
pnum = block / blocks_per_page;
|
pnum = block / blocks_per_page;
|
||||||
poff = block % blocks_per_page;
|
poff = block % blocks_per_page;
|
||||||
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
page = find_or_create_page(inode->i_mapping, pnum, gfp);
|
||||||
if (!page)
|
if (!page)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
BUG_ON(page->mapping != inode->i_mapping);
|
BUG_ON(page->mapping != inode->i_mapping);
|
||||||
|
@ -1016,7 +1016,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
|
||||||
|
|
||||||
block++;
|
block++;
|
||||||
pnum = block / blocks_per_page;
|
pnum = block / blocks_per_page;
|
||||||
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
page = find_or_create_page(inode->i_mapping, pnum, gfp);
|
||||||
if (!page)
|
if (!page)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
BUG_ON(page->mapping != inode->i_mapping);
|
BUG_ON(page->mapping != inode->i_mapping);
|
||||||
|
@ -1042,7 +1042,7 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
|
||||||
* calling this routine!
|
* calling this routine!
|
||||||
*/
|
*/
|
||||||
static noinline_for_stack
|
static noinline_for_stack
|
||||||
int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
|
int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
|
||||||
{
|
{
|
||||||
|
|
||||||
struct ext4_group_info *this_grp;
|
struct ext4_group_info *this_grp;
|
||||||
|
@ -1062,7 +1062,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
|
||||||
* The call to ext4_mb_get_buddy_page_lock will mark the
|
* The call to ext4_mb_get_buddy_page_lock will mark the
|
||||||
* page accessed.
|
* page accessed.
|
||||||
*/
|
*/
|
||||||
ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
|
ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
|
||||||
if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
|
if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
|
||||||
/*
|
/*
|
||||||
* somebody initialized the group
|
* somebody initialized the group
|
||||||
|
@ -1072,7 +1072,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
|
||||||
}
|
}
|
||||||
|
|
||||||
page = e4b.bd_bitmap_page;
|
page = e4b.bd_bitmap_page;
|
||||||
ret = ext4_mb_init_cache(page, NULL);
|
ret = ext4_mb_init_cache(page, NULL, gfp);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
if (!PageUptodate(page)) {
|
if (!PageUptodate(page)) {
|
||||||
|
@ -1091,7 +1091,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
|
||||||
}
|
}
|
||||||
/* init buddy cache */
|
/* init buddy cache */
|
||||||
page = e4b.bd_buddy_page;
|
page = e4b.bd_buddy_page;
|
||||||
ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
|
ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
if (!PageUptodate(page)) {
|
if (!PageUptodate(page)) {
|
||||||
|
@ -1109,8 +1109,8 @@ err:
|
||||||
* calling this routine!
|
* calling this routine!
|
||||||
*/
|
*/
|
||||||
static noinline_for_stack int
|
static noinline_for_stack int
|
||||||
ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
|
||||||
struct ext4_buddy *e4b)
|
struct ext4_buddy *e4b, gfp_t gfp)
|
||||||
{
|
{
|
||||||
int blocks_per_page;
|
int blocks_per_page;
|
||||||
int block;
|
int block;
|
||||||
|
@ -1140,7 +1140,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
||||||
* we need full data about the group
|
* we need full data about the group
|
||||||
* to make a good selection
|
* to make a good selection
|
||||||
*/
|
*/
|
||||||
ret = ext4_mb_init_group(sb, group);
|
ret = ext4_mb_init_group(sb, group, gfp);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -1168,11 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
||||||
* wait for it to initialize.
|
* wait for it to initialize.
|
||||||
*/
|
*/
|
||||||
page_cache_release(page);
|
page_cache_release(page);
|
||||||
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
page = find_or_create_page(inode->i_mapping, pnum, gfp);
|
||||||
if (page) {
|
if (page) {
|
||||||
BUG_ON(page->mapping != inode->i_mapping);
|
BUG_ON(page->mapping != inode->i_mapping);
|
||||||
if (!PageUptodate(page)) {
|
if (!PageUptodate(page)) {
|
||||||
ret = ext4_mb_init_cache(page, NULL);
|
ret = ext4_mb_init_cache(page, NULL, gfp);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
goto err;
|
goto err;
|
||||||
|
@ -1204,11 +1204,12 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
||||||
if (page == NULL || !PageUptodate(page)) {
|
if (page == NULL || !PageUptodate(page)) {
|
||||||
if (page)
|
if (page)
|
||||||
page_cache_release(page);
|
page_cache_release(page);
|
||||||
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
page = find_or_create_page(inode->i_mapping, pnum, gfp);
|
||||||
if (page) {
|
if (page) {
|
||||||
BUG_ON(page->mapping != inode->i_mapping);
|
BUG_ON(page->mapping != inode->i_mapping);
|
||||||
if (!PageUptodate(page)) {
|
if (!PageUptodate(page)) {
|
||||||
ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
|
ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
|
||||||
|
gfp);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
goto err;
|
goto err;
|
||||||
|
@ -1247,6 +1248,12 @@ err:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
||||||
|
struct ext4_buddy *e4b)
|
||||||
|
{
|
||||||
|
return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
|
||||||
|
}
|
||||||
|
|
||||||
static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
|
static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
|
||||||
{
|
{
|
||||||
if (e4b->bd_bitmap_page)
|
if (e4b->bd_bitmap_page)
|
||||||
|
@ -2045,7 +2052,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
|
||||||
|
|
||||||
/* We only do this if the grp has never been initialized */
|
/* We only do this if the grp has never been initialized */
|
||||||
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
|
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
|
||||||
int ret = ext4_mb_init_group(ac->ac_sb, group);
|
int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -4694,16 +4701,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
|
||||||
inode, bh, block);
|
inode, bh, block);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* We need to make sure we don't reuse the freed block until
|
|
||||||
* after the transaction is committed, which we can do by
|
|
||||||
* treating the block as metadata, below. We make an
|
|
||||||
* exception if the inode is to be written in writeback mode
|
|
||||||
* since writeback mode has weak data consistency guarantees.
|
|
||||||
*/
|
|
||||||
if (!ext4_should_writeback_data(inode))
|
|
||||||
flags |= EXT4_FREE_BLOCKS_METADATA;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the extent to be freed does not begin on a cluster
|
* If the extent to be freed does not begin on a cluster
|
||||||
* boundary, we need to deal with partial clusters at the
|
* boundary, we need to deal with partial clusters at the
|
||||||
|
@ -4738,14 +4735,13 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
|
||||||
|
|
||||||
if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
|
if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
|
||||||
int i;
|
int i;
|
||||||
|
int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
|
||||||
|
|
||||||
for (i = 0; i < count; i++) {
|
for (i = 0; i < count; i++) {
|
||||||
cond_resched();
|
cond_resched();
|
||||||
bh = sb_find_get_block(inode->i_sb, block + i);
|
if (is_metadata)
|
||||||
if (!bh)
|
bh = sb_find_get_block(inode->i_sb, block + i);
|
||||||
continue;
|
ext4_forget(handle, is_metadata, inode, bh, block + i);
|
||||||
ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
|
|
||||||
inode, bh, block + i);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4815,16 +4811,23 @@ do_more:
|
||||||
#endif
|
#endif
|
||||||
trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
|
trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
|
||||||
|
|
||||||
err = ext4_mb_load_buddy(sb, block_group, &e4b);
|
/* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
|
||||||
|
err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
|
||||||
|
GFP_NOFS|__GFP_NOFAIL);
|
||||||
if (err)
|
if (err)
|
||||||
goto error_return;
|
goto error_return;
|
||||||
|
|
||||||
if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
|
/*
|
||||||
|
* We need to make sure we don't reuse the freed block until after the
|
||||||
|
* transaction is committed. We make an exception if the inode is to be
|
||||||
|
* written in writeback mode since writeback mode has weak data
|
||||||
|
* consistency guarantees.
|
||||||
|
*/
|
||||||
|
if (ext4_handle_valid(handle) &&
|
||||||
|
((flags & EXT4_FREE_BLOCKS_METADATA) ||
|
||||||
|
!ext4_should_writeback_data(inode))) {
|
||||||
struct ext4_free_data *new_entry;
|
struct ext4_free_data *new_entry;
|
||||||
/*
|
/*
|
||||||
* blocks being freed are metadata. these blocks shouldn't
|
|
||||||
* be used until this transaction is committed
|
|
||||||
*
|
|
||||||
* We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
|
* We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
|
||||||
* to fail.
|
* to fail.
|
||||||
*/
|
*/
|
||||||
|
@ -5217,7 +5220,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
||||||
grp = ext4_get_group_info(sb, group);
|
grp = ext4_get_group_info(sb, group);
|
||||||
/* We only do this if the grp has never been initialized */
|
/* We only do this if the grp has never been initialized */
|
||||||
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
|
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
|
||||||
ret = ext4_mb_init_group(sb, group);
|
ret = ext4_mb_init_group(sb, group, GFP_NOFS);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,18 +22,6 @@
|
||||||
#include "ext4_jbd2.h"
|
#include "ext4_jbd2.h"
|
||||||
#include "ext4.h"
|
#include "ext4.h"
|
||||||
|
|
||||||
/*
|
|
||||||
* with AGGRESSIVE_CHECK allocator runs consistency checks over
|
|
||||||
* structures. these checks slow things down a lot
|
|
||||||
*/
|
|
||||||
#define AGGRESSIVE_CHECK__
|
|
||||||
|
|
||||||
/*
|
|
||||||
* with DOUBLE_CHECK defined mballoc creates persistent in-core
|
|
||||||
* bitmaps, maintains and uses them to check for double allocations
|
|
||||||
*/
|
|
||||||
#define DOUBLE_CHECK__
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_EXT4_DEBUG
|
#ifdef CONFIG_EXT4_DEBUG
|
||||||
|
|
|
@ -361,7 +361,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
|
||||||
* blocks.
|
* blocks.
|
||||||
*
|
*
|
||||||
* While converting to extents we need not
|
* While converting to extents we need not
|
||||||
* update the orignal inode i_blocks for extent blocks
|
* update the original inode i_blocks for extent blocks
|
||||||
* via quota APIs. The quota update happened via tmp_inode already.
|
* via quota APIs. The quota update happened via tmp_inode already.
|
||||||
*/
|
*/
|
||||||
spin_lock(&inode->i_lock);
|
spin_lock(&inode->i_lock);
|
||||||
|
|
|
@ -91,21 +91,22 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
|
||||||
submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
|
submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
|
||||||
wait_on_buffer(*bh);
|
wait_on_buffer(*bh);
|
||||||
if (!buffer_uptodate(*bh)) {
|
if (!buffer_uptodate(*bh)) {
|
||||||
brelse(*bh);
|
|
||||||
*bh = NULL;
|
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
goto warn_exit;
|
goto warn_exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
mmp = (struct mmp_struct *)((*bh)->b_data);
|
mmp = (struct mmp_struct *)((*bh)->b_data);
|
||||||
if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
|
if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) {
|
||||||
ret = -EFSCORRUPTED;
|
ret = -EFSCORRUPTED;
|
||||||
else if (!ext4_mmp_csum_verify(sb, mmp))
|
goto warn_exit;
|
||||||
|
}
|
||||||
|
if (!ext4_mmp_csum_verify(sb, mmp)) {
|
||||||
ret = -EFSBADCRC;
|
ret = -EFSBADCRC;
|
||||||
else
|
goto warn_exit;
|
||||||
return 0;
|
}
|
||||||
|
return 0;
|
||||||
warn_exit:
|
warn_exit:
|
||||||
|
brelse(*bh);
|
||||||
|
*bh = NULL;
|
||||||
ext4_warning(sb, "Error %d while reading MMP block %llu",
|
ext4_warning(sb, "Error %d while reading MMP block %llu",
|
||||||
ret, mmp_block);
|
ret, mmp_block);
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -181,15 +182,13 @@ static int kmmpd(void *data)
|
||||||
EXT4_FEATURE_INCOMPAT_MMP)) {
|
EXT4_FEATURE_INCOMPAT_MMP)) {
|
||||||
ext4_warning(sb, "kmmpd being stopped since MMP feature"
|
ext4_warning(sb, "kmmpd being stopped since MMP feature"
|
||||||
" has been disabled.");
|
" has been disabled.");
|
||||||
EXT4_SB(sb)->s_mmp_tsk = NULL;
|
goto exit_thread;
|
||||||
goto failed;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sb->s_flags & MS_RDONLY) {
|
if (sb->s_flags & MS_RDONLY) {
|
||||||
ext4_warning(sb, "kmmpd being stopped since filesystem "
|
ext4_warning(sb, "kmmpd being stopped since filesystem "
|
||||||
"has been remounted as readonly.");
|
"has been remounted as readonly.");
|
||||||
EXT4_SB(sb)->s_mmp_tsk = NULL;
|
goto exit_thread;
|
||||||
goto failed;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
diff = jiffies - last_update_time;
|
diff = jiffies - last_update_time;
|
||||||
|
@ -211,9 +210,7 @@ static int kmmpd(void *data)
|
||||||
if (retval) {
|
if (retval) {
|
||||||
ext4_error(sb, "error reading MMP data: %d",
|
ext4_error(sb, "error reading MMP data: %d",
|
||||||
retval);
|
retval);
|
||||||
|
goto exit_thread;
|
||||||
EXT4_SB(sb)->s_mmp_tsk = NULL;
|
|
||||||
goto failed;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mmp_check = (struct mmp_struct *)(bh_check->b_data);
|
mmp_check = (struct mmp_struct *)(bh_check->b_data);
|
||||||
|
@ -225,7 +222,9 @@ static int kmmpd(void *data)
|
||||||
"The filesystem seems to have been"
|
"The filesystem seems to have been"
|
||||||
" multiply mounted.");
|
" multiply mounted.");
|
||||||
ext4_error(sb, "abort");
|
ext4_error(sb, "abort");
|
||||||
goto failed;
|
put_bh(bh_check);
|
||||||
|
retval = -EBUSY;
|
||||||
|
goto exit_thread;
|
||||||
}
|
}
|
||||||
put_bh(bh_check);
|
put_bh(bh_check);
|
||||||
}
|
}
|
||||||
|
@ -248,7 +247,8 @@ static int kmmpd(void *data)
|
||||||
|
|
||||||
retval = write_mmp_block(sb, bh);
|
retval = write_mmp_block(sb, bh);
|
||||||
|
|
||||||
failed:
|
exit_thread:
|
||||||
|
EXT4_SB(sb)->s_mmp_tsk = NULL;
|
||||||
kfree(data);
|
kfree(data);
|
||||||
brelse(bh);
|
brelse(bh);
|
||||||
return retval;
|
return retval;
|
||||||
|
|
|
@ -128,9 +128,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
|
||||||
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
|
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
|
||||||
WARN_ON(io_end->handle);
|
WARN_ON(io_end->handle);
|
||||||
|
|
||||||
if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
|
|
||||||
wake_up_all(ext4_ioend_wq(io_end->inode));
|
|
||||||
|
|
||||||
for (bio = io_end->bio; bio; bio = next_bio) {
|
for (bio = io_end->bio; bio; bio = next_bio) {
|
||||||
next_bio = bio->bi_private;
|
next_bio = bio->bi_private;
|
||||||
ext4_finish_bio(bio);
|
ext4_finish_bio(bio);
|
||||||
|
@ -265,7 +262,6 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
|
||||||
{
|
{
|
||||||
ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
|
ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
|
||||||
if (io) {
|
if (io) {
|
||||||
atomic_inc(&EXT4_I(inode)->i_ioend_count);
|
|
||||||
io->inode = inode;
|
io->inode = inode;
|
||||||
INIT_LIST_HEAD(&io->list);
|
INIT_LIST_HEAD(&io->list);
|
||||||
atomic_set(&io->count, 1);
|
atomic_set(&io->count, 1);
|
||||||
|
|
|
@ -55,7 +55,6 @@
|
||||||
|
|
||||||
static struct ext4_lazy_init *ext4_li_info;
|
static struct ext4_lazy_init *ext4_li_info;
|
||||||
static struct mutex ext4_li_mtx;
|
static struct mutex ext4_li_mtx;
|
||||||
static int ext4_mballoc_ready;
|
|
||||||
static struct ratelimit_state ext4_mount_msg_ratelimit;
|
static struct ratelimit_state ext4_mount_msg_ratelimit;
|
||||||
|
|
||||||
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
|
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
|
||||||
|
@ -844,7 +843,6 @@ static void ext4_put_super(struct super_block *sb)
|
||||||
ext4_release_system_zone(sb);
|
ext4_release_system_zone(sb);
|
||||||
ext4_mb_release(sb);
|
ext4_mb_release(sb);
|
||||||
ext4_ext_release(sb);
|
ext4_ext_release(sb);
|
||||||
ext4_xattr_put_super(sb);
|
|
||||||
|
|
||||||
if (!(sb->s_flags & MS_RDONLY)) {
|
if (!(sb->s_flags & MS_RDONLY)) {
|
||||||
ext4_clear_feature_journal_needs_recovery(sb);
|
ext4_clear_feature_journal_needs_recovery(sb);
|
||||||
|
@ -944,7 +942,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
|
||||||
spin_lock_init(&ei->i_completed_io_lock);
|
spin_lock_init(&ei->i_completed_io_lock);
|
||||||
ei->i_sync_tid = 0;
|
ei->i_sync_tid = 0;
|
||||||
ei->i_datasync_tid = 0;
|
ei->i_datasync_tid = 0;
|
||||||
atomic_set(&ei->i_ioend_count, 0);
|
|
||||||
atomic_set(&ei->i_unwritten, 0);
|
atomic_set(&ei->i_unwritten, 0);
|
||||||
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
|
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
|
||||||
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
#ifdef CONFIG_EXT4_FS_ENCRYPTION
|
||||||
|
@ -1425,9 +1422,9 @@ static const struct mount_opts {
|
||||||
{Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
|
{Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
|
||||||
{Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
|
{Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
|
||||||
{Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
|
{Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
|
||||||
MOPT_NO_EXT2 | MOPT_SET},
|
MOPT_NO_EXT2},
|
||||||
{Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
|
{Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
|
||||||
MOPT_NO_EXT2 | MOPT_CLEAR},
|
MOPT_NO_EXT2},
|
||||||
{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
|
{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
|
||||||
{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
|
{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
|
||||||
{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
|
{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
|
||||||
|
@ -1705,6 +1702,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
|
||||||
ext4_msg(sb, KERN_INFO, "dax option not supported");
|
ext4_msg(sb, KERN_INFO, "dax option not supported");
|
||||||
return -1;
|
return -1;
|
||||||
#endif
|
#endif
|
||||||
|
} else if (token == Opt_data_err_abort) {
|
||||||
|
sbi->s_mount_opt |= m->mount_opt;
|
||||||
|
} else if (token == Opt_data_err_ignore) {
|
||||||
|
sbi->s_mount_opt &= ~m->mount_opt;
|
||||||
} else {
|
} else {
|
||||||
if (!args->from)
|
if (!args->from)
|
||||||
arg = 1;
|
arg = 1;
|
||||||
|
@ -1914,6 +1915,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
|
||||||
SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
|
SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
|
||||||
if (nodefs || sbi->s_max_dir_size_kb)
|
if (nodefs || sbi->s_max_dir_size_kb)
|
||||||
SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
|
SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
|
||||||
|
if (test_opt(sb, DATA_ERR_ABORT))
|
||||||
|
SEQ_OPTS_PUTS("data_err=abort");
|
||||||
|
|
||||||
ext4_show_quota_options(seq, sb);
|
ext4_show_quota_options(seq, sb);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -3796,12 +3799,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||||
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
|
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
|
||||||
|
|
||||||
no_journal:
|
no_journal:
|
||||||
if (ext4_mballoc_ready) {
|
sbi->s_mb_cache = ext4_xattr_create_cache();
|
||||||
sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
|
if (!sbi->s_mb_cache) {
|
||||||
if (!sbi->s_mb_cache) {
|
ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
|
||||||
ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
|
goto failed_mount_wq;
|
||||||
goto failed_mount_wq;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
|
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
|
||||||
|
@ -4027,6 +4028,10 @@ failed_mount4:
|
||||||
if (EXT4_SB(sb)->rsv_conversion_wq)
|
if (EXT4_SB(sb)->rsv_conversion_wq)
|
||||||
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
|
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
|
||||||
failed_mount_wq:
|
failed_mount_wq:
|
||||||
|
if (sbi->s_mb_cache) {
|
||||||
|
ext4_xattr_destroy_cache(sbi->s_mb_cache);
|
||||||
|
sbi->s_mb_cache = NULL;
|
||||||
|
}
|
||||||
if (sbi->s_journal) {
|
if (sbi->s_journal) {
|
||||||
jbd2_journal_destroy(sbi->s_journal);
|
jbd2_journal_destroy(sbi->s_journal);
|
||||||
sbi->s_journal = NULL;
|
sbi->s_journal = NULL;
|
||||||
|
@ -5321,7 +5326,6 @@ MODULE_ALIAS_FS("ext4");
|
||||||
|
|
||||||
/* Shared across all ext4 file systems */
|
/* Shared across all ext4 file systems */
|
||||||
wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
|
wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
|
||||||
struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
|
|
||||||
|
|
||||||
static int __init ext4_init_fs(void)
|
static int __init ext4_init_fs(void)
|
||||||
{
|
{
|
||||||
|
@ -5334,10 +5338,8 @@ static int __init ext4_init_fs(void)
|
||||||
/* Build-time check for flags consistency */
|
/* Build-time check for flags consistency */
|
||||||
ext4_check_flag_values();
|
ext4_check_flag_values();
|
||||||
|
|
||||||
for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
|
for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
|
||||||
mutex_init(&ext4__aio_mutex[i]);
|
|
||||||
init_waitqueue_head(&ext4__ioend_wq[i]);
|
init_waitqueue_head(&ext4__ioend_wq[i]);
|
||||||
}
|
|
||||||
|
|
||||||
err = ext4_init_es();
|
err = ext4_init_es();
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -5358,8 +5360,6 @@ static int __init ext4_init_fs(void)
|
||||||
err = ext4_init_mballoc();
|
err = ext4_init_mballoc();
|
||||||
if (err)
|
if (err)
|
||||||
goto out2;
|
goto out2;
|
||||||
else
|
|
||||||
ext4_mballoc_ready = 1;
|
|
||||||
err = init_inodecache();
|
err = init_inodecache();
|
||||||
if (err)
|
if (err)
|
||||||
goto out1;
|
goto out1;
|
||||||
|
@ -5375,7 +5375,6 @@ out:
|
||||||
unregister_as_ext3();
|
unregister_as_ext3();
|
||||||
destroy_inodecache();
|
destroy_inodecache();
|
||||||
out1:
|
out1:
|
||||||
ext4_mballoc_ready = 0;
|
|
||||||
ext4_exit_mballoc();
|
ext4_exit_mballoc();
|
||||||
out2:
|
out2:
|
||||||
ext4_exit_sysfs();
|
ext4_exit_sysfs();
|
||||||
|
|
166
fs/ext4/xattr.c
166
fs/ext4/xattr.c
|
@ -545,30 +545,44 @@ static void
|
||||||
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
|
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
|
||||||
struct buffer_head *bh)
|
struct buffer_head *bh)
|
||||||
{
|
{
|
||||||
struct mb_cache_entry *ce = NULL;
|
|
||||||
int error = 0;
|
|
||||||
struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
|
struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
|
||||||
|
u32 hash, ref;
|
||||||
|
int error = 0;
|
||||||
|
|
||||||
ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
|
|
||||||
BUFFER_TRACE(bh, "get_write_access");
|
BUFFER_TRACE(bh, "get_write_access");
|
||||||
error = ext4_journal_get_write_access(handle, bh);
|
error = ext4_journal_get_write_access(handle, bh);
|
||||||
if (error)
|
if (error)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
lock_buffer(bh);
|
lock_buffer(bh);
|
||||||
if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
|
hash = le32_to_cpu(BHDR(bh)->h_hash);
|
||||||
|
ref = le32_to_cpu(BHDR(bh)->h_refcount);
|
||||||
|
if (ref == 1) {
|
||||||
ea_bdebug(bh, "refcount now=0; freeing");
|
ea_bdebug(bh, "refcount now=0; freeing");
|
||||||
if (ce)
|
/*
|
||||||
mb_cache_entry_free(ce);
|
* This must happen under buffer lock for
|
||||||
|
* ext4_xattr_block_set() to reliably detect freed block
|
||||||
|
*/
|
||||||
|
mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
|
||||||
get_bh(bh);
|
get_bh(bh);
|
||||||
unlock_buffer(bh);
|
unlock_buffer(bh);
|
||||||
ext4_free_blocks(handle, inode, bh, 0, 1,
|
ext4_free_blocks(handle, inode, bh, 0, 1,
|
||||||
EXT4_FREE_BLOCKS_METADATA |
|
EXT4_FREE_BLOCKS_METADATA |
|
||||||
EXT4_FREE_BLOCKS_FORGET);
|
EXT4_FREE_BLOCKS_FORGET);
|
||||||
} else {
|
} else {
|
||||||
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
|
ref--;
|
||||||
if (ce)
|
BHDR(bh)->h_refcount = cpu_to_le32(ref);
|
||||||
mb_cache_entry_release(ce);
|
if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
|
||||||
|
struct mb_cache_entry *ce;
|
||||||
|
|
||||||
|
ce = mb_cache_entry_get(ext4_mb_cache, hash,
|
||||||
|
bh->b_blocknr);
|
||||||
|
if (ce) {
|
||||||
|
ce->e_reusable = 1;
|
||||||
|
mb_cache_entry_put(ext4_mb_cache, ce);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Beware of this ugliness: Releasing of xattr block references
|
* Beware of this ugliness: Releasing of xattr block references
|
||||||
* from different inodes can race and so we have to protect
|
* from different inodes can race and so we have to protect
|
||||||
|
@ -790,8 +804,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
|
||||||
if (i->value && i->value_len > sb->s_blocksize)
|
if (i->value && i->value_len > sb->s_blocksize)
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
if (s->base) {
|
if (s->base) {
|
||||||
ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
|
|
||||||
bs->bh->b_blocknr);
|
|
||||||
BUFFER_TRACE(bs->bh, "get_write_access");
|
BUFFER_TRACE(bs->bh, "get_write_access");
|
||||||
error = ext4_journal_get_write_access(handle, bs->bh);
|
error = ext4_journal_get_write_access(handle, bs->bh);
|
||||||
if (error)
|
if (error)
|
||||||
|
@ -799,10 +811,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
|
||||||
lock_buffer(bs->bh);
|
lock_buffer(bs->bh);
|
||||||
|
|
||||||
if (header(s->base)->h_refcount == cpu_to_le32(1)) {
|
if (header(s->base)->h_refcount == cpu_to_le32(1)) {
|
||||||
if (ce) {
|
__u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash);
|
||||||
mb_cache_entry_free(ce);
|
|
||||||
ce = NULL;
|
/*
|
||||||
}
|
* This must happen under buffer lock for
|
||||||
|
* ext4_xattr_block_set() to reliably detect modified
|
||||||
|
* block
|
||||||
|
*/
|
||||||
|
mb_cache_entry_delete_block(ext4_mb_cache, hash,
|
||||||
|
bs->bh->b_blocknr);
|
||||||
ea_bdebug(bs->bh, "modifying in-place");
|
ea_bdebug(bs->bh, "modifying in-place");
|
||||||
error = ext4_xattr_set_entry(i, s);
|
error = ext4_xattr_set_entry(i, s);
|
||||||
if (!error) {
|
if (!error) {
|
||||||
|
@ -826,10 +843,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
|
||||||
int offset = (char *)s->here - bs->bh->b_data;
|
int offset = (char *)s->here - bs->bh->b_data;
|
||||||
|
|
||||||
unlock_buffer(bs->bh);
|
unlock_buffer(bs->bh);
|
||||||
if (ce) {
|
|
||||||
mb_cache_entry_release(ce);
|
|
||||||
ce = NULL;
|
|
||||||
}
|
|
||||||
ea_bdebug(bs->bh, "cloning");
|
ea_bdebug(bs->bh, "cloning");
|
||||||
s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
|
s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
|
@ -872,6 +885,8 @@ inserted:
|
||||||
if (new_bh == bs->bh)
|
if (new_bh == bs->bh)
|
||||||
ea_bdebug(new_bh, "keeping");
|
ea_bdebug(new_bh, "keeping");
|
||||||
else {
|
else {
|
||||||
|
u32 ref;
|
||||||
|
|
||||||
/* The old block is released after updating
|
/* The old block is released after updating
|
||||||
the inode. */
|
the inode. */
|
||||||
error = dquot_alloc_block(inode,
|
error = dquot_alloc_block(inode,
|
||||||
|
@ -884,9 +899,40 @@ inserted:
|
||||||
if (error)
|
if (error)
|
||||||
goto cleanup_dquot;
|
goto cleanup_dquot;
|
||||||
lock_buffer(new_bh);
|
lock_buffer(new_bh);
|
||||||
le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
|
/*
|
||||||
|
* We have to be careful about races with
|
||||||
|
* freeing, rehashing or adding references to
|
||||||
|
* xattr block. Once we hold buffer lock xattr
|
||||||
|
* block's state is stable so we can check
|
||||||
|
* whether the block got freed / rehashed or
|
||||||
|
* not. Since we unhash mbcache entry under
|
||||||
|
* buffer lock when freeing / rehashing xattr
|
||||||
|
* block, checking whether entry is still
|
||||||
|
* hashed is reliable. Same rules hold for
|
||||||
|
* e_reusable handling.
|
||||||
|
*/
|
||||||
|
if (hlist_bl_unhashed(&ce->e_hash_list) ||
|
||||||
|
!ce->e_reusable) {
|
||||||
|
/*
|
||||||
|
* Undo everything and check mbcache
|
||||||
|
* again.
|
||||||
|
*/
|
||||||
|
unlock_buffer(new_bh);
|
||||||
|
dquot_free_block(inode,
|
||||||
|
EXT4_C2B(EXT4_SB(sb),
|
||||||
|
1));
|
||||||
|
brelse(new_bh);
|
||||||
|
mb_cache_entry_put(ext4_mb_cache, ce);
|
||||||
|
ce = NULL;
|
||||||
|
new_bh = NULL;
|
||||||
|
goto inserted;
|
||||||
|
}
|
||||||
|
ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
|
||||||
|
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
|
||||||
|
if (ref >= EXT4_XATTR_REFCOUNT_MAX)
|
||||||
|
ce->e_reusable = 0;
|
||||||
ea_bdebug(new_bh, "reusing; refcount now=%d",
|
ea_bdebug(new_bh, "reusing; refcount now=%d",
|
||||||
le32_to_cpu(BHDR(new_bh)->h_refcount));
|
ref);
|
||||||
unlock_buffer(new_bh);
|
unlock_buffer(new_bh);
|
||||||
error = ext4_handle_dirty_xattr_block(handle,
|
error = ext4_handle_dirty_xattr_block(handle,
|
||||||
inode,
|
inode,
|
||||||
|
@ -894,7 +940,8 @@ inserted:
|
||||||
if (error)
|
if (error)
|
||||||
goto cleanup_dquot;
|
goto cleanup_dquot;
|
||||||
}
|
}
|
||||||
mb_cache_entry_release(ce);
|
mb_cache_entry_touch(ext4_mb_cache, ce);
|
||||||
|
mb_cache_entry_put(ext4_mb_cache, ce);
|
||||||
ce = NULL;
|
ce = NULL;
|
||||||
} else if (bs->bh && s->base == bs->bh->b_data) {
|
} else if (bs->bh && s->base == bs->bh->b_data) {
|
||||||
/* We were modifying this block in-place. */
|
/* We were modifying this block in-place. */
|
||||||
|
@ -959,7 +1006,7 @@ getblk_failed:
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
if (ce)
|
if (ce)
|
||||||
mb_cache_entry_release(ce);
|
mb_cache_entry_put(ext4_mb_cache, ce);
|
||||||
brelse(new_bh);
|
brelse(new_bh);
|
||||||
if (!(bs->bh && s->base == bs->bh->b_data))
|
if (!(bs->bh && s->base == bs->bh->b_data))
|
||||||
kfree(s->base);
|
kfree(s->base);
|
||||||
|
@ -1070,6 +1117,17 @@ static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int ext4_xattr_value_same(struct ext4_xattr_search *s,
|
||||||
|
struct ext4_xattr_info *i)
|
||||||
|
{
|
||||||
|
void *value;
|
||||||
|
|
||||||
|
if (le32_to_cpu(s->here->e_value_size) != i->value_len)
|
||||||
|
return 0;
|
||||||
|
value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs);
|
||||||
|
return !memcmp(value, i->value, i->value_len);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ext4_xattr_set_handle()
|
* ext4_xattr_set_handle()
|
||||||
*
|
*
|
||||||
|
@ -1146,6 +1204,13 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
|
||||||
else if (!bs.s.not_found)
|
else if (!bs.s.not_found)
|
||||||
error = ext4_xattr_block_set(handle, inode, &i, &bs);
|
error = ext4_xattr_block_set(handle, inode, &i, &bs);
|
||||||
} else {
|
} else {
|
||||||
|
error = 0;
|
||||||
|
/* Xattr value did not change? Save us some work and bail out */
|
||||||
|
if (!is.s.not_found && ext4_xattr_value_same(&is.s, &i))
|
||||||
|
goto cleanup;
|
||||||
|
if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
error = ext4_xattr_ibody_set(handle, inode, &i, &is);
|
error = ext4_xattr_ibody_set(handle, inode, &i, &is);
|
||||||
if (!error && !bs.s.not_found) {
|
if (!error && !bs.s.not_found) {
|
||||||
i.value = NULL;
|
i.value = NULL;
|
||||||
|
@ -1511,17 +1576,6 @@ cleanup:
|
||||||
brelse(bh);
|
brelse(bh);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* ext4_xattr_put_super()
|
|
||||||
*
|
|
||||||
* This is called when a file system is unmounted.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
ext4_xattr_put_super(struct super_block *sb)
|
|
||||||
{
|
|
||||||
mb_cache_shrink(sb->s_bdev);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ext4_xattr_cache_insert()
|
* ext4_xattr_cache_insert()
|
||||||
*
|
*
|
||||||
|
@ -1533,26 +1587,19 @@ ext4_xattr_put_super(struct super_block *sb)
|
||||||
static void
|
static void
|
||||||
ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
|
ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
|
||||||
{
|
{
|
||||||
__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
|
struct ext4_xattr_header *header = BHDR(bh);
|
||||||
struct mb_cache_entry *ce;
|
__u32 hash = le32_to_cpu(header->h_hash);
|
||||||
|
int reusable = le32_to_cpu(header->h_refcount) <
|
||||||
|
EXT4_XATTR_REFCOUNT_MAX;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS);
|
error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
|
||||||
if (!ce) {
|
bh->b_blocknr, reusable);
|
||||||
ea_bdebug(bh, "out of memory");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
|
|
||||||
if (error) {
|
if (error) {
|
||||||
mb_cache_entry_free(ce);
|
if (error == -EBUSY)
|
||||||
if (error == -EBUSY) {
|
|
||||||
ea_bdebug(bh, "already in cache");
|
ea_bdebug(bh, "already in cache");
|
||||||
error = 0;
|
} else
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ea_bdebug(bh, "inserting [%x]", (int)hash);
|
ea_bdebug(bh, "inserting [%x]", (int)hash);
|
||||||
mb_cache_entry_release(ce);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1614,33 +1661,20 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
|
||||||
if (!header->h_hash)
|
if (!header->h_hash)
|
||||||
return NULL; /* never share */
|
return NULL; /* never share */
|
||||||
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
|
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
|
||||||
again:
|
ce = mb_cache_entry_find_first(ext4_mb_cache, hash);
|
||||||
ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev,
|
|
||||||
hash);
|
|
||||||
while (ce) {
|
while (ce) {
|
||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
|
|
||||||
if (IS_ERR(ce)) {
|
|
||||||
if (PTR_ERR(ce) == -EAGAIN)
|
|
||||||
goto again;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
bh = sb_bread(inode->i_sb, ce->e_block);
|
bh = sb_bread(inode->i_sb, ce->e_block);
|
||||||
if (!bh) {
|
if (!bh) {
|
||||||
EXT4_ERROR_INODE(inode, "block %lu read error",
|
EXT4_ERROR_INODE(inode, "block %lu read error",
|
||||||
(unsigned long) ce->e_block);
|
(unsigned long) ce->e_block);
|
||||||
} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
|
|
||||||
EXT4_XATTR_REFCOUNT_MAX) {
|
|
||||||
ea_idebug(inode, "block %lu refcount %d>=%d",
|
|
||||||
(unsigned long) ce->e_block,
|
|
||||||
le32_to_cpu(BHDR(bh)->h_refcount),
|
|
||||||
EXT4_XATTR_REFCOUNT_MAX);
|
|
||||||
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
|
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
|
||||||
*pce = ce;
|
*pce = ce;
|
||||||
return bh;
|
return bh;
|
||||||
}
|
}
|
||||||
brelse(bh);
|
brelse(bh);
|
||||||
ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
|
ce = mb_cache_entry_find_next(ext4_mb_cache, ce);
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -1716,9 +1750,9 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
|
||||||
#define HASH_BUCKET_BITS 10
|
#define HASH_BUCKET_BITS 10
|
||||||
|
|
||||||
struct mb_cache *
|
struct mb_cache *
|
||||||
ext4_xattr_create_cache(char *name)
|
ext4_xattr_create_cache(void)
|
||||||
{
|
{
|
||||||
return mb_cache_create(name, HASH_BUCKET_BITS);
|
return mb_cache_create(HASH_BUCKET_BITS);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ext4_xattr_destroy_cache(struct mb_cache *cache)
|
void ext4_xattr_destroy_cache(struct mb_cache *cache)
|
||||||
|
|
|
@ -108,7 +108,6 @@ extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_
|
||||||
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
|
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
|
||||||
|
|
||||||
extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
|
extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
|
||||||
extern void ext4_xattr_put_super(struct super_block *);
|
|
||||||
|
|
||||||
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
|
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
|
||||||
struct ext4_inode *raw_inode, handle_t *handle);
|
struct ext4_inode *raw_inode, handle_t *handle);
|
||||||
|
@ -124,7 +123,7 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
|
||||||
struct ext4_xattr_info *i,
|
struct ext4_xattr_info *i,
|
||||||
struct ext4_xattr_ibody_find *is);
|
struct ext4_xattr_ibody_find *is);
|
||||||
|
|
||||||
extern struct mb_cache *ext4_xattr_create_cache(char *name);
|
extern struct mb_cache *ext4_xattr_create_cache(void);
|
||||||
extern void ext4_xattr_destroy_cache(struct mb_cache *);
|
extern void ext4_xattr_destroy_cache(struct mb_cache *);
|
||||||
|
|
||||||
#ifdef CONFIG_EXT4_FS_SECURITY
|
#ifdef CONFIG_EXT4_FS_SECURITY
|
||||||
|
|
|
@ -131,14 +131,12 @@ static int journal_submit_commit_record(journal_t *journal,
|
||||||
if (is_journal_aborted(journal))
|
if (is_journal_aborted(journal))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
bh = jbd2_journal_get_descriptor_buffer(journal);
|
bh = jbd2_journal_get_descriptor_buffer(commit_transaction,
|
||||||
|
JBD2_COMMIT_BLOCK);
|
||||||
if (!bh)
|
if (!bh)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
tmp = (struct commit_header *)bh->b_data;
|
tmp = (struct commit_header *)bh->b_data;
|
||||||
tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
|
|
||||||
tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
|
|
||||||
tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
|
|
||||||
tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
|
tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
|
||||||
tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
|
tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
|
||||||
|
|
||||||
|
@ -222,7 +220,7 @@ static int journal_submit_data_buffers(journal_t *journal,
|
||||||
spin_lock(&journal->j_list_lock);
|
spin_lock(&journal->j_list_lock);
|
||||||
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
|
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
|
||||||
mapping = jinode->i_vfs_inode->i_mapping;
|
mapping = jinode->i_vfs_inode->i_mapping;
|
||||||
set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
|
jinode->i_flags |= JI_COMMIT_RUNNING;
|
||||||
spin_unlock(&journal->j_list_lock);
|
spin_unlock(&journal->j_list_lock);
|
||||||
/*
|
/*
|
||||||
* submit the inode data buffers. We use writepage
|
* submit the inode data buffers. We use writepage
|
||||||
|
@ -236,8 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal,
|
||||||
ret = err;
|
ret = err;
|
||||||
spin_lock(&journal->j_list_lock);
|
spin_lock(&journal->j_list_lock);
|
||||||
J_ASSERT(jinode->i_transaction == commit_transaction);
|
J_ASSERT(jinode->i_transaction == commit_transaction);
|
||||||
clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
|
jinode->i_flags &= ~JI_COMMIT_RUNNING;
|
||||||
smp_mb__after_atomic();
|
smp_mb();
|
||||||
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
|
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
|
||||||
}
|
}
|
||||||
spin_unlock(&journal->j_list_lock);
|
spin_unlock(&journal->j_list_lock);
|
||||||
|
@ -258,7 +256,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
|
||||||
/* For locking, see the comment in journal_submit_data_buffers() */
|
/* For locking, see the comment in journal_submit_data_buffers() */
|
||||||
spin_lock(&journal->j_list_lock);
|
spin_lock(&journal->j_list_lock);
|
||||||
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
|
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
|
||||||
set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
|
jinode->i_flags |= JI_COMMIT_RUNNING;
|
||||||
spin_unlock(&journal->j_list_lock);
|
spin_unlock(&journal->j_list_lock);
|
||||||
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
|
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
@ -274,8 +272,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
|
||||||
ret = err;
|
ret = err;
|
||||||
}
|
}
|
||||||
spin_lock(&journal->j_list_lock);
|
spin_lock(&journal->j_list_lock);
|
||||||
clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
|
jinode->i_flags &= ~JI_COMMIT_RUNNING;
|
||||||
smp_mb__after_atomic();
|
smp_mb();
|
||||||
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
|
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -319,22 +317,6 @@ static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
|
||||||
tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
|
tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void jbd2_descr_block_csum_set(journal_t *j,
|
|
||||||
struct buffer_head *bh)
|
|
||||||
{
|
|
||||||
struct jbd2_journal_block_tail *tail;
|
|
||||||
__u32 csum;
|
|
||||||
|
|
||||||
if (!jbd2_journal_has_csum_v2or3(j))
|
|
||||||
return;
|
|
||||||
|
|
||||||
tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
|
|
||||||
sizeof(struct jbd2_journal_block_tail));
|
|
||||||
tail->t_checksum = 0;
|
|
||||||
csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
|
|
||||||
tail->t_checksum = cpu_to_be32(csum);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
|
static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
|
||||||
struct buffer_head *bh, __u32 sequence)
|
struct buffer_head *bh, __u32 sequence)
|
||||||
{
|
{
|
||||||
|
@ -379,7 +361,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||||
ktime_t start_time;
|
ktime_t start_time;
|
||||||
u64 commit_time;
|
u64 commit_time;
|
||||||
char *tagp = NULL;
|
char *tagp = NULL;
|
||||||
journal_header_t *header;
|
|
||||||
journal_block_tag_t *tag = NULL;
|
journal_block_tag_t *tag = NULL;
|
||||||
int space_left = 0;
|
int space_left = 0;
|
||||||
int first_tag = 0;
|
int first_tag = 0;
|
||||||
|
@ -554,8 +535,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||||
jbd2_journal_abort(journal, err);
|
jbd2_journal_abort(journal, err);
|
||||||
|
|
||||||
blk_start_plug(&plug);
|
blk_start_plug(&plug);
|
||||||
jbd2_journal_write_revoke_records(journal, commit_transaction,
|
jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
|
||||||
&log_bufs, WRITE_SYNC);
|
|
||||||
|
|
||||||
jbd_debug(3, "JBD2: commit phase 2b\n");
|
jbd_debug(3, "JBD2: commit phase 2b\n");
|
||||||
|
|
||||||
|
@ -616,7 +596,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||||
|
|
||||||
jbd_debug(4, "JBD2: get descriptor\n");
|
jbd_debug(4, "JBD2: get descriptor\n");
|
||||||
|
|
||||||
descriptor = jbd2_journal_get_descriptor_buffer(journal);
|
descriptor = jbd2_journal_get_descriptor_buffer(
|
||||||
|
commit_transaction,
|
||||||
|
JBD2_DESCRIPTOR_BLOCK);
|
||||||
if (!descriptor) {
|
if (!descriptor) {
|
||||||
jbd2_journal_abort(journal, -EIO);
|
jbd2_journal_abort(journal, -EIO);
|
||||||
continue;
|
continue;
|
||||||
|
@ -625,11 +607,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||||
jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
|
jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
|
||||||
(unsigned long long)descriptor->b_blocknr,
|
(unsigned long long)descriptor->b_blocknr,
|
||||||
descriptor->b_data);
|
descriptor->b_data);
|
||||||
header = (journal_header_t *)descriptor->b_data;
|
|
||||||
header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
|
|
||||||
header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK);
|
|
||||||
header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
|
|
||||||
|
|
||||||
tagp = &descriptor->b_data[sizeof(journal_header_t)];
|
tagp = &descriptor->b_data[sizeof(journal_header_t)];
|
||||||
space_left = descriptor->b_size -
|
space_left = descriptor->b_size -
|
||||||
sizeof(journal_header_t);
|
sizeof(journal_header_t);
|
||||||
|
@ -721,7 +698,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||||
|
|
||||||
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
|
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
|
||||||
|
|
||||||
jbd2_descr_block_csum_set(journal, descriptor);
|
jbd2_descriptor_block_csum_set(journal, descriptor);
|
||||||
start_journal_io:
|
start_journal_io:
|
||||||
for (i = 0; i < bufs; i++) {
|
for (i = 0; i < bufs; i++) {
|
||||||
struct buffer_head *bh = wbuf[i];
|
struct buffer_head *bh = wbuf[i];
|
||||||
|
|
|
@ -805,10 +805,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
|
||||||
* But we don't bother doing that, so there will be coherency problems with
|
* But we don't bother doing that, so there will be coherency problems with
|
||||||
* mmaps of blockdevs which hold live JBD-controlled filesystems.
|
* mmaps of blockdevs which hold live JBD-controlled filesystems.
|
||||||
*/
|
*/
|
||||||
struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
|
struct buffer_head *
|
||||||
|
jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
|
||||||
{
|
{
|
||||||
|
journal_t *journal = transaction->t_journal;
|
||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
unsigned long long blocknr;
|
unsigned long long blocknr;
|
||||||
|
journal_header_t *header;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
err = jbd2_journal_next_log_block(journal, &blocknr);
|
err = jbd2_journal_next_log_block(journal, &blocknr);
|
||||||
|
@ -821,12 +824,31 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
|
||||||
return NULL;
|
return NULL;
|
||||||
lock_buffer(bh);
|
lock_buffer(bh);
|
||||||
memset(bh->b_data, 0, journal->j_blocksize);
|
memset(bh->b_data, 0, journal->j_blocksize);
|
||||||
|
header = (journal_header_t *)bh->b_data;
|
||||||
|
header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
|
||||||
|
header->h_blocktype = cpu_to_be32(type);
|
||||||
|
header->h_sequence = cpu_to_be32(transaction->t_tid);
|
||||||
set_buffer_uptodate(bh);
|
set_buffer_uptodate(bh);
|
||||||
unlock_buffer(bh);
|
unlock_buffer(bh);
|
||||||
BUFFER_TRACE(bh, "return this buffer");
|
BUFFER_TRACE(bh, "return this buffer");
|
||||||
return bh;
|
return bh;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh)
|
||||||
|
{
|
||||||
|
struct jbd2_journal_block_tail *tail;
|
||||||
|
__u32 csum;
|
||||||
|
|
||||||
|
if (!jbd2_journal_has_csum_v2or3(j))
|
||||||
|
return;
|
||||||
|
|
||||||
|
tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
|
||||||
|
sizeof(struct jbd2_journal_block_tail));
|
||||||
|
tail->t_checksum = 0;
|
||||||
|
csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
|
||||||
|
tail->t_checksum = cpu_to_be32(csum);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return tid of the oldest transaction in the journal and block in the journal
|
* Return tid of the oldest transaction in the journal and block in the journal
|
||||||
* where the transaction starts.
|
* where the transaction starts.
|
||||||
|
@ -1408,11 +1430,12 @@ out:
|
||||||
/**
|
/**
|
||||||
* jbd2_mark_journal_empty() - Mark on disk journal as empty.
|
* jbd2_mark_journal_empty() - Mark on disk journal as empty.
|
||||||
* @journal: The journal to update.
|
* @journal: The journal to update.
|
||||||
|
* @write_op: With which operation should we write the journal sb
|
||||||
*
|
*
|
||||||
* Update a journal's dynamic superblock fields to show that journal is empty.
|
* Update a journal's dynamic superblock fields to show that journal is empty.
|
||||||
* Write updated superblock to disk waiting for IO to complete.
|
* Write updated superblock to disk waiting for IO to complete.
|
||||||
*/
|
*/
|
||||||
static void jbd2_mark_journal_empty(journal_t *journal)
|
static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
|
||||||
{
|
{
|
||||||
journal_superblock_t *sb = journal->j_superblock;
|
journal_superblock_t *sb = journal->j_superblock;
|
||||||
|
|
||||||
|
@ -1430,7 +1453,7 @@ static void jbd2_mark_journal_empty(journal_t *journal)
|
||||||
sb->s_start = cpu_to_be32(0);
|
sb->s_start = cpu_to_be32(0);
|
||||||
read_unlock(&journal->j_state_lock);
|
read_unlock(&journal->j_state_lock);
|
||||||
|
|
||||||
jbd2_write_superblock(journal, WRITE_FUA);
|
jbd2_write_superblock(journal, write_op);
|
||||||
|
|
||||||
/* Log is no longer empty */
|
/* Log is no longer empty */
|
||||||
write_lock(&journal->j_state_lock);
|
write_lock(&journal->j_state_lock);
|
||||||
|
@ -1716,7 +1739,13 @@ int jbd2_journal_destroy(journal_t *journal)
|
||||||
if (journal->j_sb_buffer) {
|
if (journal->j_sb_buffer) {
|
||||||
if (!is_journal_aborted(journal)) {
|
if (!is_journal_aborted(journal)) {
|
||||||
mutex_lock(&journal->j_checkpoint_mutex);
|
mutex_lock(&journal->j_checkpoint_mutex);
|
||||||
jbd2_mark_journal_empty(journal);
|
|
||||||
|
write_lock(&journal->j_state_lock);
|
||||||
|
journal->j_tail_sequence =
|
||||||
|
++journal->j_transaction_sequence;
|
||||||
|
write_unlock(&journal->j_state_lock);
|
||||||
|
|
||||||
|
jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA);
|
||||||
mutex_unlock(&journal->j_checkpoint_mutex);
|
mutex_unlock(&journal->j_checkpoint_mutex);
|
||||||
} else
|
} else
|
||||||
err = -EIO;
|
err = -EIO;
|
||||||
|
@ -1975,7 +2004,7 @@ int jbd2_journal_flush(journal_t *journal)
|
||||||
* the magic code for a fully-recovered superblock. Any future
|
* the magic code for a fully-recovered superblock. Any future
|
||||||
* commits of data to the journal will restore the current
|
* commits of data to the journal will restore the current
|
||||||
* s_start value. */
|
* s_start value. */
|
||||||
jbd2_mark_journal_empty(journal);
|
jbd2_mark_journal_empty(journal, WRITE_FUA);
|
||||||
mutex_unlock(&journal->j_checkpoint_mutex);
|
mutex_unlock(&journal->j_checkpoint_mutex);
|
||||||
write_lock(&journal->j_state_lock);
|
write_lock(&journal->j_state_lock);
|
||||||
J_ASSERT(!journal->j_running_transaction);
|
J_ASSERT(!journal->j_running_transaction);
|
||||||
|
@ -2021,7 +2050,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
|
||||||
if (write) {
|
if (write) {
|
||||||
/* Lock to make assertions happy... */
|
/* Lock to make assertions happy... */
|
||||||
mutex_lock(&journal->j_checkpoint_mutex);
|
mutex_lock(&journal->j_checkpoint_mutex);
|
||||||
jbd2_mark_journal_empty(journal);
|
jbd2_mark_journal_empty(journal, WRITE_FUA);
|
||||||
mutex_unlock(&journal->j_checkpoint_mutex);
|
mutex_unlock(&journal->j_checkpoint_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2565,7 +2594,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
|
||||||
restart:
|
restart:
|
||||||
spin_lock(&journal->j_list_lock);
|
spin_lock(&journal->j_list_lock);
|
||||||
/* Is commit writing out inode - we have to wait */
|
/* Is commit writing out inode - we have to wait */
|
||||||
if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
|
if (jinode->i_flags & JI_COMMIT_RUNNING) {
|
||||||
wait_queue_head_t *wq;
|
wait_queue_head_t *wq;
|
||||||
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
|
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
|
||||||
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
|
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
|
||||||
|
|
|
@ -174,8 +174,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int jbd2_descr_block_csum_verify(journal_t *j,
|
static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
|
||||||
void *buf)
|
|
||||||
{
|
{
|
||||||
struct jbd2_journal_block_tail *tail;
|
struct jbd2_journal_block_tail *tail;
|
||||||
__be32 provided;
|
__be32 provided;
|
||||||
|
@ -522,8 +521,8 @@ static int do_one_pass(journal_t *journal,
|
||||||
descr_csum_size =
|
descr_csum_size =
|
||||||
sizeof(struct jbd2_journal_block_tail);
|
sizeof(struct jbd2_journal_block_tail);
|
||||||
if (descr_csum_size > 0 &&
|
if (descr_csum_size > 0 &&
|
||||||
!jbd2_descr_block_csum_verify(journal,
|
!jbd2_descriptor_block_csum_verify(journal,
|
||||||
bh->b_data)) {
|
bh->b_data)) {
|
||||||
printk(KERN_ERR "JBD2: Invalid checksum "
|
printk(KERN_ERR "JBD2: Invalid checksum "
|
||||||
"recovering block %lu in log\n",
|
"recovering block %lu in log\n",
|
||||||
next_log_block);
|
next_log_block);
|
||||||
|
@ -811,26 +810,6 @@ static int do_one_pass(journal_t *journal,
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int jbd2_revoke_block_csum_verify(journal_t *j,
|
|
||||||
void *buf)
|
|
||||||
{
|
|
||||||
struct jbd2_journal_revoke_tail *tail;
|
|
||||||
__be32 provided;
|
|
||||||
__u32 calculated;
|
|
||||||
|
|
||||||
if (!jbd2_journal_has_csum_v2or3(j))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
|
|
||||||
sizeof(struct jbd2_journal_revoke_tail));
|
|
||||||
provided = tail->r_checksum;
|
|
||||||
tail->r_checksum = 0;
|
|
||||||
calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
|
|
||||||
tail->r_checksum = provided;
|
|
||||||
|
|
||||||
return provided == cpu_to_be32(calculated);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Scan a revoke record, marking all blocks mentioned as revoked. */
|
/* Scan a revoke record, marking all blocks mentioned as revoked. */
|
||||||
|
|
||||||
static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
|
static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
|
||||||
|
@ -846,11 +825,11 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
|
||||||
offset = sizeof(jbd2_journal_revoke_header_t);
|
offset = sizeof(jbd2_journal_revoke_header_t);
|
||||||
rcount = be32_to_cpu(header->r_count);
|
rcount = be32_to_cpu(header->r_count);
|
||||||
|
|
||||||
if (!jbd2_revoke_block_csum_verify(journal, header))
|
if (!jbd2_descriptor_block_csum_verify(journal, header))
|
||||||
return -EFSBADCRC;
|
return -EFSBADCRC;
|
||||||
|
|
||||||
if (jbd2_journal_has_csum_v2or3(journal))
|
if (jbd2_journal_has_csum_v2or3(journal))
|
||||||
csum_size = sizeof(struct jbd2_journal_revoke_tail);
|
csum_size = sizeof(struct jbd2_journal_block_tail);
|
||||||
if (rcount > journal->j_blocksize - csum_size)
|
if (rcount > journal->j_blocksize - csum_size)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
max = rcount;
|
max = rcount;
|
||||||
|
|
|
@ -122,11 +122,11 @@ struct jbd2_revoke_table_s
|
||||||
|
|
||||||
|
|
||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
static void write_one_revoke_record(journal_t *, transaction_t *,
|
static void write_one_revoke_record(transaction_t *,
|
||||||
struct list_head *,
|
struct list_head *,
|
||||||
struct buffer_head **, int *,
|
struct buffer_head **, int *,
|
||||||
struct jbd2_revoke_record_s *, int);
|
struct jbd2_revoke_record_s *);
|
||||||
static void flush_descriptor(journal_t *, struct buffer_head *, int, int);
|
static void flush_descriptor(journal_t *, struct buffer_head *, int);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Utility functions to maintain the revoke table */
|
/* Utility functions to maintain the revoke table */
|
||||||
|
@ -519,11 +519,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
|
||||||
* Write revoke records to the journal for all entries in the current
|
* Write revoke records to the journal for all entries in the current
|
||||||
* revoke hash, deleting the entries as we go.
|
* revoke hash, deleting the entries as we go.
|
||||||
*/
|
*/
|
||||||
void jbd2_journal_write_revoke_records(journal_t *journal,
|
void jbd2_journal_write_revoke_records(transaction_t *transaction,
|
||||||
transaction_t *transaction,
|
struct list_head *log_bufs)
|
||||||
struct list_head *log_bufs,
|
|
||||||
int write_op)
|
|
||||||
{
|
{
|
||||||
|
journal_t *journal = transaction->t_journal;
|
||||||
struct buffer_head *descriptor;
|
struct buffer_head *descriptor;
|
||||||
struct jbd2_revoke_record_s *record;
|
struct jbd2_revoke_record_s *record;
|
||||||
struct jbd2_revoke_table_s *revoke;
|
struct jbd2_revoke_table_s *revoke;
|
||||||
|
@ -544,16 +543,15 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
|
||||||
while (!list_empty(hash_list)) {
|
while (!list_empty(hash_list)) {
|
||||||
record = (struct jbd2_revoke_record_s *)
|
record = (struct jbd2_revoke_record_s *)
|
||||||
hash_list->next;
|
hash_list->next;
|
||||||
write_one_revoke_record(journal, transaction, log_bufs,
|
write_one_revoke_record(transaction, log_bufs,
|
||||||
&descriptor, &offset,
|
&descriptor, &offset, record);
|
||||||
record, write_op);
|
|
||||||
count++;
|
count++;
|
||||||
list_del(&record->hash);
|
list_del(&record->hash);
|
||||||
kmem_cache_free(jbd2_revoke_record_cache, record);
|
kmem_cache_free(jbd2_revoke_record_cache, record);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (descriptor)
|
if (descriptor)
|
||||||
flush_descriptor(journal, descriptor, offset, write_op);
|
flush_descriptor(journal, descriptor, offset);
|
||||||
jbd_debug(1, "Wrote %d revoke records\n", count);
|
jbd_debug(1, "Wrote %d revoke records\n", count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -562,18 +560,16 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
|
||||||
* block if the old one is full or if we have not already created one.
|
* block if the old one is full or if we have not already created one.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void write_one_revoke_record(journal_t *journal,
|
static void write_one_revoke_record(transaction_t *transaction,
|
||||||
transaction_t *transaction,
|
|
||||||
struct list_head *log_bufs,
|
struct list_head *log_bufs,
|
||||||
struct buffer_head **descriptorp,
|
struct buffer_head **descriptorp,
|
||||||
int *offsetp,
|
int *offsetp,
|
||||||
struct jbd2_revoke_record_s *record,
|
struct jbd2_revoke_record_s *record)
|
||||||
int write_op)
|
|
||||||
{
|
{
|
||||||
|
journal_t *journal = transaction->t_journal;
|
||||||
int csum_size = 0;
|
int csum_size = 0;
|
||||||
struct buffer_head *descriptor;
|
struct buffer_head *descriptor;
|
||||||
int sz, offset;
|
int sz, offset;
|
||||||
journal_header_t *header;
|
|
||||||
|
|
||||||
/* If we are already aborting, this all becomes a noop. We
|
/* If we are already aborting, this all becomes a noop. We
|
||||||
still need to go round the loop in
|
still need to go round the loop in
|
||||||
|
@ -587,7 +583,7 @@ static void write_one_revoke_record(journal_t *journal,
|
||||||
|
|
||||||
/* Do we need to leave space at the end for a checksum? */
|
/* Do we need to leave space at the end for a checksum? */
|
||||||
if (jbd2_journal_has_csum_v2or3(journal))
|
if (jbd2_journal_has_csum_v2or3(journal))
|
||||||
csum_size = sizeof(struct jbd2_journal_revoke_tail);
|
csum_size = sizeof(struct jbd2_journal_block_tail);
|
||||||
|
|
||||||
if (jbd2_has_feature_64bit(journal))
|
if (jbd2_has_feature_64bit(journal))
|
||||||
sz = 8;
|
sz = 8;
|
||||||
|
@ -597,19 +593,16 @@ static void write_one_revoke_record(journal_t *journal,
|
||||||
/* Make sure we have a descriptor with space left for the record */
|
/* Make sure we have a descriptor with space left for the record */
|
||||||
if (descriptor) {
|
if (descriptor) {
|
||||||
if (offset + sz > journal->j_blocksize - csum_size) {
|
if (offset + sz > journal->j_blocksize - csum_size) {
|
||||||
flush_descriptor(journal, descriptor, offset, write_op);
|
flush_descriptor(journal, descriptor, offset);
|
||||||
descriptor = NULL;
|
descriptor = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!descriptor) {
|
if (!descriptor) {
|
||||||
descriptor = jbd2_journal_get_descriptor_buffer(journal);
|
descriptor = jbd2_journal_get_descriptor_buffer(transaction,
|
||||||
|
JBD2_REVOKE_BLOCK);
|
||||||
if (!descriptor)
|
if (!descriptor)
|
||||||
return;
|
return;
|
||||||
header = (journal_header_t *)descriptor->b_data;
|
|
||||||
header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
|
|
||||||
header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK);
|
|
||||||
header->h_sequence = cpu_to_be32(transaction->t_tid);
|
|
||||||
|
|
||||||
/* Record it so that we can wait for IO completion later */
|
/* Record it so that we can wait for IO completion later */
|
||||||
BUFFER_TRACE(descriptor, "file in log_bufs");
|
BUFFER_TRACE(descriptor, "file in log_bufs");
|
||||||
|
@ -630,21 +623,6 @@ static void write_one_revoke_record(journal_t *journal,
|
||||||
*offsetp = offset;
|
*offsetp = offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
|
|
||||||
{
|
|
||||||
struct jbd2_journal_revoke_tail *tail;
|
|
||||||
__u32 csum;
|
|
||||||
|
|
||||||
if (!jbd2_journal_has_csum_v2or3(j))
|
|
||||||
return;
|
|
||||||
|
|
||||||
tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
|
|
||||||
sizeof(struct jbd2_journal_revoke_tail));
|
|
||||||
tail->r_checksum = 0;
|
|
||||||
csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
|
|
||||||
tail->r_checksum = cpu_to_be32(csum);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flush a revoke descriptor out to the journal. If we are aborting,
|
* Flush a revoke descriptor out to the journal. If we are aborting,
|
||||||
* this is a noop; otherwise we are generating a buffer which needs to
|
* this is a noop; otherwise we are generating a buffer which needs to
|
||||||
|
@ -654,7 +632,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
|
||||||
|
|
||||||
static void flush_descriptor(journal_t *journal,
|
static void flush_descriptor(journal_t *journal,
|
||||||
struct buffer_head *descriptor,
|
struct buffer_head *descriptor,
|
||||||
int offset, int write_op)
|
int offset)
|
||||||
{
|
{
|
||||||
jbd2_journal_revoke_header_t *header;
|
jbd2_journal_revoke_header_t *header;
|
||||||
|
|
||||||
|
@ -665,12 +643,12 @@ static void flush_descriptor(journal_t *journal,
|
||||||
|
|
||||||
header = (jbd2_journal_revoke_header_t *)descriptor->b_data;
|
header = (jbd2_journal_revoke_header_t *)descriptor->b_data;
|
||||||
header->r_count = cpu_to_be32(offset);
|
header->r_count = cpu_to_be32(offset);
|
||||||
jbd2_revoke_csum_set(journal, descriptor);
|
jbd2_descriptor_block_csum_set(journal, descriptor);
|
||||||
|
|
||||||
set_buffer_jwrite(descriptor);
|
set_buffer_jwrite(descriptor);
|
||||||
BUFFER_TRACE(descriptor, "write");
|
BUFFER_TRACE(descriptor, "write");
|
||||||
set_buffer_dirty(descriptor);
|
set_buffer_dirty(descriptor);
|
||||||
write_dirty_buffer(descriptor, write_op);
|
write_dirty_buffer(descriptor, WRITE_SYNC);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -966,14 +966,8 @@ repeat:
|
||||||
if (!frozen_buffer) {
|
if (!frozen_buffer) {
|
||||||
JBUFFER_TRACE(jh, "allocate memory for buffer");
|
JBUFFER_TRACE(jh, "allocate memory for buffer");
|
||||||
jbd_unlock_bh_state(bh);
|
jbd_unlock_bh_state(bh);
|
||||||
frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
|
frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
|
||||||
if (!frozen_buffer) {
|
GFP_NOFS | __GFP_NOFAIL);
|
||||||
printk(KERN_ERR "%s: OOM for frozen_buffer\n",
|
|
||||||
__func__);
|
|
||||||
JBUFFER_TRACE(jh, "oom!");
|
|
||||||
error = -ENOMEM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
goto repeat;
|
goto repeat;
|
||||||
}
|
}
|
||||||
jh->b_frozen_data = frozen_buffer;
|
jh->b_frozen_data = frozen_buffer;
|
||||||
|
@ -1226,15 +1220,9 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
repeat:
|
repeat:
|
||||||
if (!jh->b_committed_data) {
|
if (!jh->b_committed_data)
|
||||||
committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
|
committed_data = jbd2_alloc(jh2bh(jh)->b_size,
|
||||||
if (!committed_data) {
|
GFP_NOFS|__GFP_NOFAIL);
|
||||||
printk(KERN_ERR "%s: No memory for committed data\n",
|
|
||||||
__func__);
|
|
||||||
err = -ENOMEM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
jbd_lock_bh_state(bh);
|
jbd_lock_bh_state(bh);
|
||||||
if (!jh->b_committed_data) {
|
if (!jh->b_committed_data) {
|
||||||
|
|
1151
fs/mbcache.c
1151
fs/mbcache.c
File diff suppressed because it is too large
Load Diff
|
@ -200,7 +200,7 @@ typedef struct journal_block_tag_s
|
||||||
__be32 t_blocknr_high; /* most-significant high 32bits. */
|
__be32 t_blocknr_high; /* most-significant high 32bits. */
|
||||||
} journal_block_tag_t;
|
} journal_block_tag_t;
|
||||||
|
|
||||||
/* Tail of descriptor block, for checksumming */
|
/* Tail of descriptor or revoke block, for checksumming */
|
||||||
struct jbd2_journal_block_tail {
|
struct jbd2_journal_block_tail {
|
||||||
__be32 t_checksum; /* crc32c(uuid+descr_block) */
|
__be32 t_checksum; /* crc32c(uuid+descr_block) */
|
||||||
};
|
};
|
||||||
|
@ -215,11 +215,6 @@ typedef struct jbd2_journal_revoke_header_s
|
||||||
__be32 r_count; /* Count of bytes used in the block */
|
__be32 r_count; /* Count of bytes used in the block */
|
||||||
} jbd2_journal_revoke_header_t;
|
} jbd2_journal_revoke_header_t;
|
||||||
|
|
||||||
/* Tail of revoke block, for checksumming */
|
|
||||||
struct jbd2_journal_revoke_tail {
|
|
||||||
__be32 r_checksum; /* crc32c(uuid+revoke_block) */
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Definitions for the journal tag flags word: */
|
/* Definitions for the journal tag flags word: */
|
||||||
#define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */
|
#define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */
|
||||||
#define JBD2_FLAG_SAME_UUID 2 /* block has same uuid as previous */
|
#define JBD2_FLAG_SAME_UUID 2 /* block has same uuid as previous */
|
||||||
|
@ -1137,7 +1132,8 @@ static inline void jbd2_unfile_log_bh(struct buffer_head *bh)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Log buffer allocation */
|
/* Log buffer allocation */
|
||||||
struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
|
struct buffer_head *jbd2_journal_get_descriptor_buffer(transaction_t *, int);
|
||||||
|
void jbd2_descriptor_block_csum_set(journal_t *, struct buffer_head *);
|
||||||
int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
|
int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
|
||||||
int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
|
int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
|
||||||
unsigned long *block);
|
unsigned long *block);
|
||||||
|
@ -1327,10 +1323,8 @@ extern int jbd2_journal_init_revoke_caches(void);
|
||||||
extern void jbd2_journal_destroy_revoke(journal_t *);
|
extern void jbd2_journal_destroy_revoke(journal_t *);
|
||||||
extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *);
|
extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *);
|
||||||
extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *);
|
extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *);
|
||||||
extern void jbd2_journal_write_revoke_records(journal_t *journal,
|
extern void jbd2_journal_write_revoke_records(transaction_t *transaction,
|
||||||
transaction_t *transaction,
|
struct list_head *log_bufs);
|
||||||
struct list_head *log_bufs,
|
|
||||||
int write_op);
|
|
||||||
|
|
||||||
/* Recovery revoke support */
|
/* Recovery revoke support */
|
||||||
extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
|
extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
|
||||||
|
|
|
@ -1,55 +1,52 @@
|
||||||
/*
|
#ifndef _LINUX_MBCACHE_H
|
||||||
File: linux/mbcache.h
|
#define _LINUX_MBCACHE_H
|
||||||
|
|
||||||
|
#include <linux/hash.h>
|
||||||
|
#include <linux/list_bl.h>
|
||||||
|
#include <linux/list.h>
|
||||||
|
#include <linux/atomic.h>
|
||||||
|
#include <linux/fs.h>
|
||||||
|
|
||||||
|
struct mb_cache;
|
||||||
|
|
||||||
(C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
|
|
||||||
*/
|
|
||||||
struct mb_cache_entry {
|
struct mb_cache_entry {
|
||||||
struct list_head e_lru_list;
|
/* List of entries in cache - protected by cache->c_list_lock */
|
||||||
struct mb_cache *e_cache;
|
struct list_head e_list;
|
||||||
unsigned short e_used;
|
/* Hash table list - protected by hash chain bitlock */
|
||||||
unsigned short e_queued;
|
struct hlist_bl_node e_hash_list;
|
||||||
atomic_t e_refcnt;
|
atomic_t e_refcnt;
|
||||||
struct block_device *e_bdev;
|
/* Key in hash - stable during lifetime of the entry */
|
||||||
sector_t e_block;
|
u32 e_key;
|
||||||
struct hlist_bl_node e_block_list;
|
u32 e_referenced:1;
|
||||||
struct {
|
u32 e_reusable:1;
|
||||||
struct hlist_bl_node o_list;
|
/* Block number of hashed block - stable during lifetime of the entry */
|
||||||
unsigned int o_key;
|
sector_t e_block;
|
||||||
} e_index;
|
|
||||||
struct hlist_bl_head *e_block_hash_p;
|
|
||||||
struct hlist_bl_head *e_index_hash_p;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mb_cache {
|
struct mb_cache *mb_cache_create(int bucket_bits);
|
||||||
struct list_head c_cache_list;
|
void mb_cache_destroy(struct mb_cache *cache);
|
||||||
const char *c_name;
|
|
||||||
atomic_t c_entry_count;
|
|
||||||
int c_max_entries;
|
|
||||||
int c_bucket_bits;
|
|
||||||
struct kmem_cache *c_entry_cache;
|
|
||||||
struct hlist_bl_head *c_block_hash;
|
|
||||||
struct hlist_bl_head *c_index_hash;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Functions on caches */
|
int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
|
||||||
|
sector_t block, bool reusable);
|
||||||
|
void __mb_cache_entry_free(struct mb_cache_entry *entry);
|
||||||
|
static inline int mb_cache_entry_put(struct mb_cache *cache,
|
||||||
|
struct mb_cache_entry *entry)
|
||||||
|
{
|
||||||
|
if (!atomic_dec_and_test(&entry->e_refcnt))
|
||||||
|
return 0;
|
||||||
|
__mb_cache_entry_free(entry);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
struct mb_cache *mb_cache_create(const char *, int);
|
void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
|
||||||
void mb_cache_shrink(struct block_device *);
|
sector_t block);
|
||||||
void mb_cache_destroy(struct mb_cache *);
|
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
|
||||||
|
sector_t block);
|
||||||
/* Functions on cache entries */
|
|
||||||
|
|
||||||
struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *, gfp_t);
|
|
||||||
int mb_cache_entry_insert(struct mb_cache_entry *, struct block_device *,
|
|
||||||
sector_t, unsigned int);
|
|
||||||
void mb_cache_entry_release(struct mb_cache_entry *);
|
|
||||||
void mb_cache_entry_free(struct mb_cache_entry *);
|
|
||||||
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *,
|
|
||||||
struct block_device *,
|
|
||||||
sector_t);
|
|
||||||
struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
|
struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
|
||||||
struct block_device *,
|
u32 key);
|
||||||
unsigned int);
|
struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
|
||||||
struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *,
|
struct mb_cache_entry *entry);
|
||||||
struct block_device *,
|
void mb_cache_entry_touch(struct mb_cache *cache,
|
||||||
unsigned int);
|
struct mb_cache_entry *entry);
|
||||||
|
|
||||||
|
#endif /* _LINUX_MBCACHE_H */
|
||||||
|
|
Loading…
Reference in New Issue