Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (64 commits)
  ext4: Update documentation about quota mount options
  ext4: replace MAX_DEFRAG_SIZE with EXT_MAX_BLOCK
  ext4: Fix the alloc on close after a truncate hueristic
  ext4: Add a tracepoint for ext4_alloc_da_blocks()
  ext4: store EXT4_EXT_MIGRATE in i_state instead of i_flags
  ext4: limit block allocations for indirect-block files to < 2^32
  ext4: Fix different block exchange issue in EXT4_IOC_MOVE_EXT
  ext4: Add null extent check to ext_get_path
  ext4: Replace BUG_ON() with ext4_error() in move_extents.c
  ext4: Replace get_ext_path macro with an inline funciton
  ext4: Fix include/trace/events/ext4.h to work with Systemtap
  ext4: Fix initalization of s_flex_groups
  ext4: Always set dx_node's fake_dirent explicitly.
  ext4: Fix async commit mode to be safe by using a barrier
  ext4: Don't update superblock write time when filesystem is read-only
  ext4: Clarify the locking details in mballoc
  ext4: check for need init flag in ext4_mb_load_buddy
  ext4: move ext4_mb_init_group() function earlier in the mballoc.c
  ext4: Make non-journal fsync work properly
  ext4: Assure that metadata blocks are written during fsync in no journal mode
  ...
This commit is contained in:
Linus Torvalds 2009-09-18 10:56:26 -07:00
commit 3530c18862
25 changed files with 1005 additions and 606 deletions

View File

@ -134,15 +134,9 @@ ro Mount filesystem read only. Note that ext4 will
mount options "ro,noload" can be used to prevent mount options "ro,noload" can be used to prevent
writes to the filesystem. writes to the filesystem.
journal_checksum Enable checksumming of the journal transactions.
This will allow the recovery code in e2fsck and the
kernel to detect corruption in the kernel. It is a
compatible change and will be ignored by older kernels.
journal_async_commit Commit block can be written to disk without waiting journal_async_commit Commit block can be written to disk without waiting
for descriptor blocks. If enabled older kernels cannot for descriptor blocks. If enabled older kernels cannot
mount the device. This will enable 'journal_checksum' mount the device.
internally.
journal=update Update the ext4 file system's journal to the current journal=update Update the ext4 file system's journal to the current
format. format.
@ -263,10 +257,18 @@ resuid=n The user ID which may use the reserved blocks.
sb=n Use alternate superblock at this location. sb=n Use alternate superblock at this location.
quota quota These options are ignored by the filesystem. They
noquota noquota are used only by quota tools to recognize volumes
grpquota grpquota where quota should be turned on. See documentation
usrquota usrquota in the quota-tools package for more details
(http://sourceforge.net/projects/linuxquota).
jqfmt=<quota type> These options tell filesystem details about quota
usrjquota=<file> so that quota information can be properly updated
grpjquota=<file> during journal replay. They replace the above
quota options. See documentation in the quota-tools
package for more details
(http://sourceforge.net/projects/linuxquota).
bh (*) ext4 associates buffer heads to data pages to bh (*) ext4 associates buffer heads to data pages to
nobh (a) cache disk block mapping information nobh (a) cache disk block mapping information

View File

@ -37,7 +37,7 @@ config EXT4DEV_COMPAT
To enable backwards compatibility so that systems that are To enable backwards compatibility so that systems that are
still expecting to mount ext4 filesystems using ext4dev, still expecting to mount ext4 filesystems using ext4dev,
chose Y here. This feature will go away by 2.6.31, so choose Y here. This feature will go away by 2.6.31, so
please arrange to get your userspace programs fixed! please arrange to get your userspace programs fixed!
config EXT4_FS_XATTR config EXT4_FS_XATTR
@ -77,3 +77,12 @@ config EXT4_FS_SECURITY
If you are not using a security module that requires using If you are not using a security module that requires using
extended attributes for file security labels, say N. extended attributes for file security labels, say N.
config EXT4_DEBUG
bool "EXT4 debugging support"
depends on EXT4_FS
help
Enables run-time debugging support for the ext4 filesystem.
If you select Y here, then you will be able to turn on debugging
with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug"

View File

@ -478,7 +478,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
* new bitmap information * new bitmap information
*/ */
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
ext4_mb_update_group_info(grp, blocks_freed); grp->bb_free += blocks_freed;
up_write(&grp->alloc_sem); up_write(&grp->alloc_sem);
/* We dirtied the bitmap block */ /* We dirtied the bitmap block */

View File

@ -67,27 +67,29 @@ typedef unsigned int ext4_group_t;
/* prefer goal again. length */ /* prefer goal again. length */
#define EXT4_MB_HINT_MERGE 1 #define EXT4_MB_HINT_MERGE 0x0001
/* blocks already reserved */ /* blocks already reserved */
#define EXT4_MB_HINT_RESERVED 2 #define EXT4_MB_HINT_RESERVED 0x0002
/* metadata is being allocated */ /* metadata is being allocated */
#define EXT4_MB_HINT_METADATA 4 #define EXT4_MB_HINT_METADATA 0x0004
/* first blocks in the file */ /* first blocks in the file */
#define EXT4_MB_HINT_FIRST 8 #define EXT4_MB_HINT_FIRST 0x0008
/* search for the best chunk */ /* search for the best chunk */
#define EXT4_MB_HINT_BEST 16 #define EXT4_MB_HINT_BEST 0x0010
/* data is being allocated */ /* data is being allocated */
#define EXT4_MB_HINT_DATA 32 #define EXT4_MB_HINT_DATA 0x0020
/* don't preallocate (for tails) */ /* don't preallocate (for tails) */
#define EXT4_MB_HINT_NOPREALLOC 64 #define EXT4_MB_HINT_NOPREALLOC 0x0040
/* allocate for locality group */ /* allocate for locality group */
#define EXT4_MB_HINT_GROUP_ALLOC 128 #define EXT4_MB_HINT_GROUP_ALLOC 0x0080
/* allocate goal blocks or none */ /* allocate goal blocks or none */
#define EXT4_MB_HINT_GOAL_ONLY 256 #define EXT4_MB_HINT_GOAL_ONLY 0x0100
/* goal is meaningful */ /* goal is meaningful */
#define EXT4_MB_HINT_TRY_GOAL 512 #define EXT4_MB_HINT_TRY_GOAL 0x0200
/* blocks already pre-reserved by delayed allocation */ /* blocks already pre-reserved by delayed allocation */
#define EXT4_MB_DELALLOC_RESERVED 1024 #define EXT4_MB_DELALLOC_RESERVED 0x0400
/* We are doing stream allocation */
#define EXT4_MB_STREAM_ALLOC 0x0800
struct ext4_allocation_request { struct ext4_allocation_request {
@ -111,6 +113,21 @@ struct ext4_allocation_request {
unsigned int flags; unsigned int flags;
}; };
/*
* For delayed allocation tracking
*/
struct mpage_da_data {
struct inode *inode;
sector_t b_blocknr; /* start block number of extent */
size_t b_size; /* size of extent */
unsigned long b_state; /* state of the extent */
unsigned long first_page, next_page; /* extent of pages */
struct writeback_control *wbc;
int io_done;
int pages_written;
int retval;
};
/* /*
* Special inodes numbers * Special inodes numbers
*/ */
@ -251,7 +268,6 @@ struct flex_groups {
#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
@ -289,6 +305,7 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ #define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */
#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */
/* Used to pass group descriptor data when online resize is done */ /* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input { struct ext4_new_group_input {
@ -386,6 +403,9 @@ struct ext4_mount_options {
#endif #endif
}; };
/* Max physical block we can addres w/o extents */
#define EXT4_MAX_BLOCK_FILE_PHYS 0xFFFFFFFF
/* /*
* Structure of an inode on the disk * Structure of an inode on the disk
*/ */
@ -456,7 +476,6 @@ struct move_extent {
__u64 len; /* block length to be moved */ __u64 len; /* block length to be moved */
__u64 moved_len; /* moved block length */ __u64 moved_len; /* moved block length */
}; };
#define MAX_DEFRAG_SIZE ((1UL<<31) - 1)
#define EXT4_EPOCH_BITS 2 #define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
@ -694,7 +713,6 @@ struct ext4_inode_info {
#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
@ -841,6 +859,7 @@ struct ext4_sb_info {
unsigned long s_gdb_count; /* Number of group descriptor blocks */ unsigned long s_gdb_count; /* Number of group descriptor blocks */
unsigned long s_desc_per_block; /* Number of group descriptors per block */ unsigned long s_desc_per_block; /* Number of group descriptors per block */
ext4_group_t s_groups_count; /* Number of groups in the fs */ ext4_group_t s_groups_count; /* Number of groups in the fs */
ext4_group_t s_blockfile_groups;/* Groups acceptable for non-extent files */
unsigned long s_overhead_last; /* Last calculated overhead */ unsigned long s_overhead_last; /* Last calculated overhead */
unsigned long s_blocks_last; /* Last seen block count */ unsigned long s_blocks_last; /* Last seen block count */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
@ -950,6 +969,7 @@ struct ext4_sb_info {
atomic_t s_mb_lost_chunks; atomic_t s_mb_lost_chunks;
atomic_t s_mb_preallocated; atomic_t s_mb_preallocated;
atomic_t s_mb_discarded; atomic_t s_mb_discarded;
atomic_t s_lock_busy;
/* locality groups */ /* locality groups */
struct ext4_locality_group *s_locality_groups; struct ext4_locality_group *s_locality_groups;
@ -1340,8 +1360,6 @@ extern void ext4_mb_free_blocks(handle_t *, struct inode *,
ext4_fsblk_t, unsigned long, int, unsigned long *); ext4_fsblk_t, unsigned long, int, unsigned long *);
extern int ext4_mb_add_groupinfo(struct super_block *sb, extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc); ext4_group_t i, struct ext4_group_desc *desc);
extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
ext4_grpblk_t add);
extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
extern void ext4_mb_put_buddy_cache_lock(struct super_block *, extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
ext4_group_t, int); ext4_group_t, int);
@ -1367,6 +1385,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern int ext4_can_truncate(struct inode *inode); extern int ext4_can_truncate(struct inode *inode);
extern void ext4_truncate(struct inode *); extern void ext4_truncate(struct inode *);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *); extern void ext4_set_inode_flags(struct inode *);
extern void ext4_get_inode_flags(struct ext4_inode_info *); extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern int ext4_alloc_da_blocks(struct inode *inode); extern int ext4_alloc_da_blocks(struct inode *inode);
@ -1575,15 +1594,18 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
struct ext4_group_info { struct ext4_group_info {
unsigned long bb_state; unsigned long bb_state;
struct rb_root bb_free_root; struct rb_root bb_free_root;
unsigned short bb_first_free; ext4_grpblk_t bb_first_free; /* first free block */
unsigned short bb_free; ext4_grpblk_t bb_free; /* total free blocks */
unsigned short bb_fragments; ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
struct list_head bb_prealloc_list; struct list_head bb_prealloc_list;
#ifdef DOUBLE_CHECK #ifdef DOUBLE_CHECK
void *bb_bitmap; void *bb_bitmap;
#endif #endif
struct rw_semaphore alloc_sem; struct rw_semaphore alloc_sem;
unsigned short bb_counters[]; ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
* regions, index is order.
* bb_counters[3] = 5 means
* 5 free 8-block regions. */
}; };
#define EXT4_GROUP_INFO_NEED_INIT_BIT 0 #define EXT4_GROUP_INFO_NEED_INIT_BIT 0
@ -1591,15 +1613,42 @@ struct ext4_group_info {
#define EXT4_MB_GRP_NEED_INIT(grp) \ #define EXT4_MB_GRP_NEED_INIT(grp) \
(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
#define EXT4_MAX_CONTENTION 8
#define EXT4_CONTENTION_THRESHOLD 2
static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
ext4_group_t group) ext4_group_t group)
{ {
return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
} }
/*
* Returns true if the filesystem is busy enough that attempts to
* access the block group locks has run into contention.
*/
static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi)
{
return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
}
static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
{ {
spin_lock(ext4_group_lock_ptr(sb, group)); spinlock_t *lock = ext4_group_lock_ptr(sb, group);
if (spin_trylock(lock))
/*
* We're able to grab the lock right away, so drop the
* lock contention counter.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
else {
/*
* The lock is busy, so bump the contention counter,
* and then wait on the spin lock.
*/
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
EXT4_MAX_CONTENTION);
spin_lock(lock);
}
} }
static inline void ext4_unlock_group(struct super_block *sb, static inline void ext4_unlock_group(struct super_block *sb,

View File

@ -43,8 +43,7 @@
#define CHECK_BINSEARCH__ #define CHECK_BINSEARCH__
/* /*
* If EXT_DEBUG is defined you can use the 'extdebug' mount option * Turn on EXT_DEBUG to get lots of info about extents operations.
* to get lots of info about what's going on.
*/ */
#define EXT_DEBUG__ #define EXT_DEBUG__
#ifdef EXT_DEBUG #ifdef EXT_DEBUG
@ -138,6 +137,7 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
#define EXT_BREAK 1 #define EXT_BREAK 1
#define EXT_REPEAT 2 #define EXT_REPEAT 2
/* Maximum logical block in a file; ext4_extent's ee_block is __le32 */
#define EXT_MAX_BLOCK 0xffffffff #define EXT_MAX_BLOCK 0xffffffff
/* /*

View File

@ -44,7 +44,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
handle, err); handle, err);
} }
else else
brelse(bh); bforget(bh);
return err; return err;
} }
@ -60,7 +60,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
handle, err); handle, err);
} }
else else
brelse(bh); bforget(bh);
return err; return err;
} }
@ -89,7 +89,10 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
ext4_journal_abort_handle(where, __func__, bh, ext4_journal_abort_handle(where, __func__, bh,
handle, err); handle, err);
} else { } else {
mark_buffer_dirty(bh); if (inode && bh)
mark_buffer_dirty_inode(bh, inode);
else
mark_buffer_dirty(bh);
if (inode && inode_needs_sync(inode)) { if (inode && inode_needs_sync(inode)) {
sync_dirty_buffer(bh); sync_dirty_buffer(bh);
if (buffer_req(bh) && !buffer_uptodate(bh)) { if (buffer_req(bh) && !buffer_uptodate(bh)) {

View File

@ -93,7 +93,9 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
} }
static int ext4_ext_journal_restart(handle_t *handle, int needed) static int ext4_ext_truncate_extend_restart(handle_t *handle,
struct inode *inode,
int needed)
{ {
int err; int err;
@ -104,7 +106,14 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
err = ext4_journal_extend(handle, needed); err = ext4_journal_extend(handle, needed);
if (err <= 0) if (err <= 0)
return err; return err;
return ext4_journal_restart(handle, needed); err = ext4_truncate_restart_trans(handle, inode, needed);
/*
* We have dropped i_data_sem so someone might have cached again
* an extent we are going to truncate.
*/
ext4_ext_invalidate_cache(inode);
return err;
} }
/* /*
@ -220,57 +229,65 @@ ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
return newblock; return newblock;
} }
static int ext4_ext_space_block(struct inode *inode) static inline int ext4_ext_space_block(struct inode *inode, int check)
{ {
int size; int size;
size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
/ sizeof(struct ext4_extent); / sizeof(struct ext4_extent);
if (!check) {
#ifdef AGGRESSIVE_TEST #ifdef AGGRESSIVE_TEST
if (size > 6) if (size > 6)
size = 6; size = 6;
#endif #endif
}
return size; return size;
} }
static int ext4_ext_space_block_idx(struct inode *inode) static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
{ {
int size; int size;
size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
/ sizeof(struct ext4_extent_idx); / sizeof(struct ext4_extent_idx);
if (!check) {
#ifdef AGGRESSIVE_TEST #ifdef AGGRESSIVE_TEST
if (size > 5) if (size > 5)
size = 5; size = 5;
#endif #endif
}
return size; return size;
} }
static int ext4_ext_space_root(struct inode *inode) static inline int ext4_ext_space_root(struct inode *inode, int check)
{ {
int size; int size;
size = sizeof(EXT4_I(inode)->i_data); size = sizeof(EXT4_I(inode)->i_data);
size -= sizeof(struct ext4_extent_header); size -= sizeof(struct ext4_extent_header);
size /= sizeof(struct ext4_extent); size /= sizeof(struct ext4_extent);
if (!check) {
#ifdef AGGRESSIVE_TEST #ifdef AGGRESSIVE_TEST
if (size > 3) if (size > 3)
size = 3; size = 3;
#endif #endif
}
return size; return size;
} }
static int ext4_ext_space_root_idx(struct inode *inode) static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
{ {
int size; int size;
size = sizeof(EXT4_I(inode)->i_data); size = sizeof(EXT4_I(inode)->i_data);
size -= sizeof(struct ext4_extent_header); size -= sizeof(struct ext4_extent_header);
size /= sizeof(struct ext4_extent_idx); size /= sizeof(struct ext4_extent_idx);
if (!check) {
#ifdef AGGRESSIVE_TEST #ifdef AGGRESSIVE_TEST
if (size > 4) if (size > 4)
size = 4; size = 4;
#endif #endif
}
return size; return size;
} }
@ -284,9 +301,9 @@ int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
int lcap, icap, rcap, leafs, idxs, num; int lcap, icap, rcap, leafs, idxs, num;
int newextents = blocks; int newextents = blocks;
rcap = ext4_ext_space_root_idx(inode); rcap = ext4_ext_space_root_idx(inode, 0);
lcap = ext4_ext_space_block(inode); lcap = ext4_ext_space_block(inode, 0);
icap = ext4_ext_space_block_idx(inode); icap = ext4_ext_space_block_idx(inode, 0);
/* number of new leaf blocks needed */ /* number of new leaf blocks needed */
num = leafs = (newextents + lcap - 1) / lcap; num = leafs = (newextents + lcap - 1) / lcap;
@ -311,14 +328,14 @@ ext4_ext_max_entries(struct inode *inode, int depth)
if (depth == ext_depth(inode)) { if (depth == ext_depth(inode)) {
if (depth == 0) if (depth == 0)
max = ext4_ext_space_root(inode); max = ext4_ext_space_root(inode, 1);
else else
max = ext4_ext_space_root_idx(inode); max = ext4_ext_space_root_idx(inode, 1);
} else { } else {
if (depth == 0) if (depth == 0)
max = ext4_ext_space_block(inode); max = ext4_ext_space_block(inode, 1);
else else
max = ext4_ext_space_block_idx(inode); max = ext4_ext_space_block_idx(inode, 1);
} }
return max; return max;
@ -437,8 +454,9 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
idx_pblock(path->p_idx)); idx_pblock(path->p_idx));
} else if (path->p_ext) { } else if (path->p_ext) {
ext_debug(" %d:%d:%llu ", ext_debug(" %d:[%d]%d:%llu ",
le32_to_cpu(path->p_ext->ee_block), le32_to_cpu(path->p_ext->ee_block),
ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_len(path->p_ext), ext4_ext_get_actual_len(path->p_ext),
ext_pblock(path->p_ext)); ext_pblock(path->p_ext));
} else } else
@ -460,8 +478,11 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
eh = path[depth].p_hdr; eh = path[depth].p_hdr;
ex = EXT_FIRST_EXTENT(eh); ex = EXT_FIRST_EXTENT(eh);
ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino);
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_len(ex), ext_pblock(ex)); ext4_ext_get_actual_len(ex), ext_pblock(ex));
} }
ext_debug("\n"); ext_debug("\n");
@ -580,9 +601,10 @@ ext4_ext_binsearch(struct inode *inode,
} }
path->p_ext = l - 1; path->p_ext = l - 1;
ext_debug(" -> %d:%llu:%d ", ext_debug(" -> %d:%llu:[%d]%d ",
le32_to_cpu(path->p_ext->ee_block), le32_to_cpu(path->p_ext->ee_block),
ext_pblock(path->p_ext), ext_pblock(path->p_ext),
ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_len(path->p_ext)); ext4_ext_get_actual_len(path->p_ext));
#ifdef CHECK_BINSEARCH #ifdef CHECK_BINSEARCH
@ -612,7 +634,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
eh->eh_depth = 0; eh->eh_depth = 0;
eh->eh_entries = 0; eh->eh_entries = 0;
eh->eh_magic = EXT4_EXT_MAGIC; eh->eh_magic = EXT4_EXT_MAGIC;
eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode)); eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
ext4_ext_invalidate_cache(inode); ext4_ext_invalidate_cache(inode);
return 0; return 0;
@ -837,7 +859,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
neh = ext_block_hdr(bh); neh = ext_block_hdr(bh);
neh->eh_entries = 0; neh->eh_entries = 0;
neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_magic = EXT4_EXT_MAGIC;
neh->eh_depth = 0; neh->eh_depth = 0;
ex = EXT_FIRST_EXTENT(neh); ex = EXT_FIRST_EXTENT(neh);
@ -850,9 +872,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
path[depth].p_ext++; path[depth].p_ext++;
while (path[depth].p_ext <= while (path[depth].p_ext <=
EXT_MAX_EXTENT(path[depth].p_hdr)) { EXT_MAX_EXTENT(path[depth].p_hdr)) {
ext_debug("move %d:%llu:%d in new leaf %llu\n", ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
le32_to_cpu(path[depth].p_ext->ee_block), le32_to_cpu(path[depth].p_ext->ee_block),
ext_pblock(path[depth].p_ext), ext_pblock(path[depth].p_ext),
ext4_ext_is_uninitialized(path[depth].p_ext),
ext4_ext_get_actual_len(path[depth].p_ext), ext4_ext_get_actual_len(path[depth].p_ext),
newblock); newblock);
/*memmove(ex++, path[depth].p_ext++, /*memmove(ex++, path[depth].p_ext++,
@ -912,7 +935,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
neh = ext_block_hdr(bh); neh = ext_block_hdr(bh);
neh->eh_entries = cpu_to_le16(1); neh->eh_entries = cpu_to_le16(1);
neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_magic = EXT4_EXT_MAGIC;
neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
neh->eh_depth = cpu_to_le16(depth - i); neh->eh_depth = cpu_to_le16(depth - i);
fidx = EXT_FIRST_INDEX(neh); fidx = EXT_FIRST_INDEX(neh);
fidx->ei_block = border; fidx->ei_block = border;
@ -1037,9 +1060,9 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
/* old root could have indexes or leaves /* old root could have indexes or leaves
* so calculate e_max right way */ * so calculate e_max right way */
if (ext_depth(inode)) if (ext_depth(inode))
neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
else else
neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_magic = EXT4_EXT_MAGIC;
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
unlock_buffer(bh); unlock_buffer(bh);
@ -1054,7 +1077,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
goto out; goto out;
curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode)); curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
curp->p_hdr->eh_entries = cpu_to_le16(1); curp->p_hdr->eh_entries = cpu_to_le16(1);
curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
@ -1580,9 +1603,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
/* try to insert block into found extent and return */ /* try to insert block into found extent and return */
if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append %d block to %d:%d (from %llu)\n", ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext), ext4_ext_get_actual_len(newext),
le32_to_cpu(ex->ee_block), le32_to_cpu(ex->ee_block),
ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_len(ex), ext_pblock(ex)); ext4_ext_get_actual_len(ex), ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth); err = ext4_ext_get_access(handle, inode, path + depth);
if (err) if (err)
@ -1651,9 +1676,10 @@ has_space:
if (!nearex) { if (!nearex) {
/* there is no extent in this leaf, create first one */ /* there is no extent in this leaf, create first one */
ext_debug("first extent in the leaf: %d:%llu:%d\n", ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
le32_to_cpu(newext->ee_block), le32_to_cpu(newext->ee_block),
ext_pblock(newext), ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext)); ext4_ext_get_actual_len(newext));
path[depth].p_ext = EXT_FIRST_EXTENT(eh); path[depth].p_ext = EXT_FIRST_EXTENT(eh);
} else if (le32_to_cpu(newext->ee_block) } else if (le32_to_cpu(newext->ee_block)
@ -1663,10 +1689,11 @@ has_space:
len = EXT_MAX_EXTENT(eh) - nearex; len = EXT_MAX_EXTENT(eh) - nearex;
len = (len - 1) * sizeof(struct ext4_extent); len = (len - 1) * sizeof(struct ext4_extent);
len = len < 0 ? 0 : len; len = len < 0 ? 0 : len;
ext_debug("insert %d:%llu:%d after: nearest 0x%p, " ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n", "move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block), le32_to_cpu(newext->ee_block),
ext_pblock(newext), ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext), ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2); nearex, len, nearex + 1, nearex + 2);
memmove(nearex + 2, nearex + 1, len); memmove(nearex + 2, nearex + 1, len);
@ -1676,10 +1703,11 @@ has_space:
BUG_ON(newext->ee_block == nearex->ee_block); BUG_ON(newext->ee_block == nearex->ee_block);
len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
len = len < 0 ? 0 : len; len = len < 0 ? 0 : len;
ext_debug("insert %d:%llu:%d before: nearest 0x%p, " ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n", "move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block), le32_to_cpu(newext->ee_block),
ext_pblock(newext), ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext), ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2); nearex, len, nearex + 1, nearex + 2);
memmove(nearex + 1, nearex, len); memmove(nearex + 1, nearex, len);
@ -2094,7 +2122,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
else else
uninitialized = 0; uninitialized = 0;
ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); ext_debug("remove ext %u:[%d]%d\n", ex_ee_block,
uninitialized, ex_ee_len);
path[depth].p_ext = ex; path[depth].p_ext = ex;
a = ex_ee_block > start ? ex_ee_block : start; a = ex_ee_block > start ? ex_ee_block : start;
@ -2138,7 +2167,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
} }
credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
err = ext4_ext_journal_restart(handle, credits); err = ext4_ext_truncate_extend_restart(handle, inode, credits);
if (err) if (err)
goto out; goto out;
@ -2327,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
if (err == 0) { if (err == 0) {
ext_inode_hdr(inode)->eh_depth = 0; ext_inode_hdr(inode)->eh_depth = 0;
ext_inode_hdr(inode)->eh_max = ext_inode_hdr(inode)->eh_max =
cpu_to_le16(ext4_ext_space_root(inode)); cpu_to_le16(ext4_ext_space_root(inode, 0));
err = ext4_ext_dirty(handle, inode, path); err = ext4_ext_dirty(handle, inode, path);
} }
} }
@ -2743,6 +2772,7 @@ insert:
} else if (err) } else if (err)
goto fix_extent_len; goto fix_extent_len;
out: out:
ext4_ext_show_leaf(inode, path);
return err ? err : allocated; return err ? err : allocated;
fix_extent_len: fix_extent_len:
@ -2786,7 +2816,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
struct ext4_allocation_request ar; struct ext4_allocation_request ar;
__clear_bit(BH_New, &bh_result->b_state); __clear_bit(BH_New, &bh_result->b_state);
ext_debug("blocks %u/%u requested for inode %u\n", ext_debug("blocks %u/%u requested for inode %lu\n",
iblock, max_blocks, inode->i_ino); iblock, max_blocks, inode->i_ino);
/* check in cache */ /* check in cache */
@ -2849,7 +2879,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
newblock = iblock - ee_block + ee_start; newblock = iblock - ee_block + ee_start;
/* number of remaining blocks in the extent */ /* number of remaining blocks in the extent */
allocated = ee_len - (iblock - ee_block); allocated = ee_len - (iblock - ee_block);
ext_debug("%u fit into %lu:%d -> %llu\n", iblock, ext_debug("%u fit into %u:%d -> %llu\n", iblock,
ee_block, ee_len, newblock); ee_block, ee_len, newblock);
/* Do not put uninitialized extent in the cache */ /* Do not put uninitialized extent in the cache */
@ -2950,7 +2980,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
newblock = ext4_mb_new_blocks(handle, &ar, &err); newblock = ext4_mb_new_blocks(handle, &ar, &err);
if (!newblock) if (!newblock)
goto out2; goto out2;
ext_debug("allocate new block: goal %llu, found %llu/%lu\n", ext_debug("allocate new block: goal %llu, found %llu/%u\n",
ar.goal, newblock, allocated); ar.goal, newblock, allocated);
/* try to insert new extent into found leaf and return */ /* try to insert new extent into found leaf and return */

View File

@ -50,7 +50,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
{ {
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
int ret = 0; int err, ret = 0;
J_ASSERT(ext4_journal_current_handle() == NULL); J_ASSERT(ext4_journal_current_handle() == NULL);
@ -79,6 +79,9 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
goto out; goto out;
} }
if (!journal)
ret = sync_mapping_buffers(inode->i_mapping);
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
goto out; goto out;
@ -91,10 +94,12 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
.sync_mode = WB_SYNC_ALL, .sync_mode = WB_SYNC_ALL,
.nr_to_write = 0, /* sys_fsync did this */ .nr_to_write = 0, /* sys_fsync did this */
}; };
ret = sync_inode(inode, &wbc); err = sync_inode(inode, &wbc);
if (journal && (journal->j_flags & JBD2_BARRIER)) if (ret == 0)
blkdev_issue_flush(inode->i_sb->s_bdev, NULL); ret = err;
} }
out: out:
if (journal && (journal->j_flags & JBD2_BARRIER))
blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
return ret; return ret;
} }

View File

@ -1189,7 +1189,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
i, ext4_free_inodes_count(sb, gdp), x); (unsigned long) i, ext4_free_inodes_count(sb, gdp), x);
bitmap_count += x; bitmap_count += x;
} }
brelse(bitmap_bh); brelse(bitmap_bh);

View File

@ -192,11 +192,24 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
* so before we call here everything must be consistently dirtied against * so before we call here everything must be consistently dirtied against
* this transaction. * this transaction.
*/ */
static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
int nblocks)
{ {
int ret;
/*
* Drop i_data_sem to avoid deadlock with ext4_get_blocks At this
* moment, get_block can be called only for blocks inside i_size since
* page cache has been already dropped and writes are blocked by
* i_mutex. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL); BUG_ON(EXT4_JOURNAL(inode) == NULL);
jbd_debug(2, "restarting handle %p\n", handle); jbd_debug(2, "restarting handle %p\n", handle);
return ext4_journal_restart(handle, blocks_for_truncate(inode)); up_write(&EXT4_I(inode)->i_data_sem);
ret = ext4_journal_restart(handle, blocks_for_truncate(inode));
down_write(&EXT4_I(inode)->i_data_sem);
return ret;
} }
/* /*
@ -341,9 +354,7 @@ static int ext4_block_to_path(struct inode *inode,
int n = 0; int n = 0;
int final = 0; int final = 0;
if (i_block < 0) { if (i_block < direct_blocks) {
ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0");
} else if (i_block < direct_blocks) {
offsets[n++] = i_block; offsets[n++] = i_block;
final = direct_blocks; final = direct_blocks;
} else if ((i_block -= direct_blocks) < indirect_blocks) { } else if ((i_block -= direct_blocks) < indirect_blocks) {
@ -551,15 +562,21 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
* *
* Normally this function find the preferred place for block allocation, * Normally this function find the preferred place for block allocation,
* returns it. * returns it.
* Because this is only used for non-extent files, we limit the block nr
* to 32 bits.
*/ */
static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
Indirect *partial) Indirect *partial)
{ {
ext4_fsblk_t goal;
/* /*
* XXX need to get goal block from mballoc's data structures * XXX need to get goal block from mballoc's data structures
*/ */
return ext4_find_near(inode, partial); goal = ext4_find_near(inode, partial);
goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
return goal;
} }
/** /**
@ -640,6 +657,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
if (*err) if (*err)
goto failed_out; goto failed_out;
BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS);
target -= count; target -= count;
/* allocate blocks for indirect blocks */ /* allocate blocks for indirect blocks */
while (index < indirect_blks && count) { while (index < indirect_blks && count) {
@ -674,6 +693,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
ar.flags = EXT4_MB_HINT_DATA; ar.flags = EXT4_MB_HINT_DATA;
current_block = ext4_mb_new_blocks(handle, &ar, err); current_block = ext4_mb_new_blocks(handle, &ar, err);
BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS);
if (*err && (target == blks)) { if (*err && (target == blks)) {
/* /*
@ -762,8 +782,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
BUFFER_TRACE(bh, "call get_create_access"); BUFFER_TRACE(bh, "call get_create_access");
err = ext4_journal_get_create_access(handle, bh); err = ext4_journal_get_create_access(handle, bh);
if (err) { if (err) {
/* Don't brelse(bh) here; it's done in
* ext4_journal_forget() below */
unlock_buffer(bh); unlock_buffer(bh);
brelse(bh);
goto failed; goto failed;
} }
@ -1109,16 +1130,15 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
ext4_discard_preallocations(inode); ext4_discard_preallocations(inode);
} }
static int check_block_validity(struct inode *inode, sector_t logical, static int check_block_validity(struct inode *inode, const char *msg,
sector_t phys, int len) sector_t logical, sector_t phys, int len)
{ {
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
ext4_error(inode->i_sb, "check_block_validity", ext4_error(inode->i_sb, msg,
"inode #%lu logical block %llu mapped to %llu " "inode #%lu logical block %llu mapped to %llu "
"(size %d)", inode->i_ino, "(size %d)", inode->i_ino,
(unsigned long long) logical, (unsigned long long) logical,
(unsigned long long) phys, len); (unsigned long long) phys, len);
WARN_ON(1);
return -EIO; return -EIO;
} }
return 0; return 0;
@ -1170,8 +1190,8 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
up_read((&EXT4_I(inode)->i_data_sem)); up_read((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && buffer_mapped(bh)) { if (retval > 0 && buffer_mapped(bh)) {
int ret = check_block_validity(inode, block, int ret = check_block_validity(inode, "file system corruption",
bh->b_blocknr, retval); block, bh->b_blocknr, retval);
if (ret != 0) if (ret != 0)
return ret; return ret;
} }
@ -1235,8 +1255,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
* i_data's format changing. Force the migrate * i_data's format changing. Force the migrate
* to fail by clearing migrate flags * to fail by clearing migrate flags
*/ */
EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
~EXT4_EXT_MIGRATE;
} }
} }
@ -1252,8 +1271,9 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
up_write((&EXT4_I(inode)->i_data_sem)); up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && buffer_mapped(bh)) { if (retval > 0 && buffer_mapped(bh)) {
int ret = check_block_validity(inode, block, int ret = check_block_validity(inode, "file system "
bh->b_blocknr, retval); "corruption after allocation",
block, bh->b_blocknr, retval);
if (ret != 0) if (ret != 0)
return ret; return ret;
} }
@ -1863,18 +1883,6 @@ static void ext4_da_page_release_reservation(struct page *page,
* Delayed allocation stuff * Delayed allocation stuff
*/ */
struct mpage_da_data {
struct inode *inode;
sector_t b_blocknr; /* start block number of extent */
size_t b_size; /* size of extent */
unsigned long b_state; /* state of the extent */
unsigned long first_page, next_page; /* extent of pages */
struct writeback_control *wbc;
int io_done;
int pages_written;
int retval;
};
/* /*
* mpage_da_submit_io - walks through extent of pages and try to write * mpage_da_submit_io - walks through extent of pages and try to write
* them with writepage() call back * them with writepage() call back
@ -2737,6 +2745,7 @@ static int ext4_da_writepages(struct address_space *mapping,
long pages_skipped; long pages_skipped;
int range_cyclic, cycled = 1, io_done = 0; int range_cyclic, cycled = 1, io_done = 0;
int needed_blocks, ret = 0, nr_to_writebump = 0; int needed_blocks, ret = 0, nr_to_writebump = 0;
loff_t range_start = wbc->range_start;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
trace_ext4_da_writepages(inode, wbc); trace_ext4_da_writepages(inode, wbc);
@ -2850,6 +2859,7 @@ retry:
mpd.io_done = 1; mpd.io_done = 1;
ret = MPAGE_DA_EXTENT_TAIL; ret = MPAGE_DA_EXTENT_TAIL;
} }
trace_ext4_da_write_pages(inode, &mpd);
wbc->nr_to_write -= mpd.pages_written; wbc->nr_to_write -= mpd.pages_written;
ext4_journal_stop(handle); ext4_journal_stop(handle);
@ -2905,6 +2915,7 @@ out_writepages:
if (!no_nrwrite_index_update) if (!no_nrwrite_index_update)
wbc->no_nrwrite_index_update = 0; wbc->no_nrwrite_index_update = 0;
wbc->nr_to_write -= nr_to_writebump; wbc->nr_to_write -= nr_to_writebump;
wbc->range_start = range_start;
trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
return ret; return ret;
} }
@ -3117,6 +3128,8 @@ out:
*/ */
int ext4_alloc_da_blocks(struct inode *inode) int ext4_alloc_da_blocks(struct inode *inode)
{ {
trace_ext4_alloc_da_blocks(inode);
if (!EXT4_I(inode)->i_reserved_data_blocks && if (!EXT4_I(inode)->i_reserved_data_blocks &&
!EXT4_I(inode)->i_reserved_meta_blocks) !EXT4_I(inode)->i_reserved_meta_blocks)
return 0; return 0;
@ -3659,7 +3672,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
ext4_handle_dirty_metadata(handle, inode, bh); ext4_handle_dirty_metadata(handle, inode, bh);
} }
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
ext4_journal_test_restart(handle, inode); ext4_truncate_restart_trans(handle, inode,
blocks_for_truncate(inode));
if (bh) { if (bh) {
BUFFER_TRACE(bh, "retaking write access"); BUFFER_TRACE(bh, "retaking write access");
ext4_journal_get_write_access(handle, bh); ext4_journal_get_write_access(handle, bh);
@ -3870,7 +3884,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
return; return;
if (try_to_extend_transaction(handle, inode)) { if (try_to_extend_transaction(handle, inode)) {
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
ext4_journal_test_restart(handle, inode); ext4_truncate_restart_trans(handle, inode,
blocks_for_truncate(inode));
} }
ext4_free_blocks(handle, inode, nr, 1, 1); ext4_free_blocks(handle, inode, nr, 1, 1);
@ -3958,8 +3973,7 @@ void ext4_truncate(struct inode *inode)
if (!ext4_can_truncate(inode)) if (!ext4_can_truncate(inode))
return; return;
if (ei->i_disksize && inode->i_size == 0 && if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
!test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
@ -4533,7 +4547,8 @@ static int ext4_inode_blocks_set(handle_t *handle,
*/ */
static int ext4_do_update_inode(handle_t *handle, static int ext4_do_update_inode(handle_t *handle,
struct inode *inode, struct inode *inode,
struct ext4_iloc *iloc) struct ext4_iloc *iloc,
int do_sync)
{ {
struct ext4_inode *raw_inode = ext4_raw_inode(iloc); struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_inode_info *ei = EXT4_I(inode);
@ -4581,8 +4596,7 @@ static int ext4_do_update_inode(handle_t *handle,
if (ext4_inode_blocks_set(handle, raw_inode, ei)) if (ext4_inode_blocks_set(handle, raw_inode, ei))
goto out_brelse; goto out_brelse;
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
/* clear the migrate flag in the raw_inode */ raw_inode->i_flags = cpu_to_le32(ei->i_flags);
raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_HURD)) cpu_to_le32(EXT4_OS_HURD))
raw_inode->i_file_acl_high = raw_inode->i_file_acl_high =
@ -4635,10 +4649,22 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
} }
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); /*
rc = ext4_handle_dirty_metadata(handle, inode, bh); * If we're not using a journal and we were called from
if (!err) * ext4_write_inode() to sync the inode (making do_sync true),
err = rc; * we can just use sync_dirty_buffer() directly to do our dirty
* work. Testing s_journal here is a bit redundant but it's
* worth it to avoid potential future trouble.
*/
if (EXT4_SB(inode->i_sb)->s_journal == NULL && do_sync) {
BUFFER_TRACE(bh, "call sync_dirty_buffer");
sync_dirty_buffer(bh);
} else {
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
rc = ext4_handle_dirty_metadata(handle, inode, bh);
if (!err)
err = rc;
}
ei->i_state &= ~EXT4_STATE_NEW; ei->i_state &= ~EXT4_STATE_NEW;
out_brelse: out_brelse:
@ -4684,19 +4710,32 @@ out_brelse:
*/ */
int ext4_write_inode(struct inode *inode, int wait) int ext4_write_inode(struct inode *inode, int wait)
{ {
int err;
if (current->flags & PF_MEMALLOC) if (current->flags & PF_MEMALLOC)
return 0; return 0;
if (ext4_journal_current_handle()) { if (EXT4_SB(inode->i_sb)->s_journal) {
jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); if (ext4_journal_current_handle()) {
dump_stack(); jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
return -EIO; dump_stack();
return -EIO;
}
if (!wait)
return 0;
err = ext4_force_commit(inode->i_sb);
} else {
struct ext4_iloc iloc;
err = ext4_get_inode_loc(inode, &iloc);
if (err)
return err;
err = ext4_do_update_inode(EXT4_NOJOURNAL_HANDLE,
inode, &iloc, wait);
} }
return err;
if (!wait)
return 0;
return ext4_force_commit(inode->i_sb);
} }
/* /*
@ -4990,7 +5029,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
get_bh(iloc->bh); get_bh(iloc->bh);
/* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */
err = ext4_do_update_inode(handle, inode, iloc); err = ext4_do_update_inode(handle, inode, iloc, 0);
put_bh(iloc->bh); put_bh(iloc->bh);
return err; return err;
} }
@ -5281,12 +5320,21 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
else else
len = PAGE_CACHE_SIZE; len = PAGE_CACHE_SIZE;
lock_page(page);
/*
* return if we have all the buffers mapped. This avoid
* the need to call write_begin/write_end which does a
* journal_start/journal_stop which can block and take
* long time
*/
if (page_has_buffers(page)) { if (page_has_buffers(page)) {
/* return if we have all the buffers mapped */
if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
ext4_bh_unmapped)) ext4_bh_unmapped)) {
unlock_page(page);
goto out_unlock; goto out_unlock;
}
} }
unlock_page(page);
/* /*
* OK, we need to fill the hole... Do write_begin write_end * OK, we need to fill the hole... Do write_begin write_end
* to do block allocation/reservation.We are not holding * to do block allocation/reservation.We are not holding

View File

@ -243,10 +243,9 @@ setversion_out:
me.donor_start, me.len, &me.moved_len); me.donor_start, me.len, &me.moved_len);
fput(donor_filp); fput(donor_filp);
if (!err) if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
if (copy_to_user((struct move_extent *)arg, return -EFAULT;
&me, sizeof(me)))
return -EFAULT;
return err; return err;
} }

View File

@ -22,6 +22,7 @@
*/ */
#include "mballoc.h" #include "mballoc.h"
#include <linux/debugfs.h>
#include <trace/events/ext4.h> #include <trace/events/ext4.h>
/* /*
@ -622,13 +623,13 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
/* FIXME!! need more doc */ /* FIXME!! need more doc */
static void ext4_mb_mark_free_simple(struct super_block *sb, static void ext4_mb_mark_free_simple(struct super_block *sb,
void *buddy, unsigned first, int len, void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
struct ext4_group_info *grp) struct ext4_group_info *grp)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned short min; ext4_grpblk_t min;
unsigned short max; ext4_grpblk_t max;
unsigned short chunk; ext4_grpblk_t chunk;
unsigned short border; unsigned short border;
BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
@ -662,10 +663,10 @@ void ext4_mb_generate_buddy(struct super_block *sb,
void *buddy, void *bitmap, ext4_group_t group) void *buddy, void *bitmap, ext4_group_t group)
{ {
struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct ext4_group_info *grp = ext4_get_group_info(sb, group);
unsigned short max = EXT4_BLOCKS_PER_GROUP(sb); ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb);
unsigned short i = 0; ext4_grpblk_t i = 0;
unsigned short first; ext4_grpblk_t first;
unsigned short len; ext4_grpblk_t len;
unsigned free = 0; unsigned free = 0;
unsigned fragments = 0; unsigned fragments = 0;
unsigned long long period = get_cycles(); unsigned long long period = get_cycles();
@ -743,7 +744,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
char *data; char *data;
char *bitmap; char *bitmap;
mb_debug("init page %lu\n", page->index); mb_debug(1, "init page %lu\n", page->index);
inode = page->mapping->host; inode = page->mapping->host;
sb = inode->i_sb; sb = inode->i_sb;
@ -822,7 +823,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
set_bitmap_uptodate(bh[i]); set_bitmap_uptodate(bh[i]);
bh[i]->b_end_io = end_buffer_read_sync; bh[i]->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh[i]); submit_bh(READ, bh[i]);
mb_debug("read bitmap for group %u\n", first_group + i); mb_debug(1, "read bitmap for group %u\n", first_group + i);
} }
/* wait for I/O completion */ /* wait for I/O completion */
@ -862,12 +863,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
if ((first_block + i) & 1) { if ((first_block + i) & 1) {
/* this is block of buddy */ /* this is block of buddy */
BUG_ON(incore == NULL); BUG_ON(incore == NULL);
mb_debug("put buddy for group %u in page %lu/%x\n", mb_debug(1, "put buddy for group %u in page %lu/%x\n",
group, page->index, i * blocksize); group, page->index, i * blocksize);
grinfo = ext4_get_group_info(sb, group); grinfo = ext4_get_group_info(sb, group);
grinfo->bb_fragments = 0; grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0, memset(grinfo->bb_counters, 0,
sizeof(unsigned short)*(sb->s_blocksize_bits+2)); sizeof(*grinfo->bb_counters) *
(sb->s_blocksize_bits+2));
/* /*
* incore got set to the group block bitmap below * incore got set to the group block bitmap below
*/ */
@ -878,7 +880,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
} else { } else {
/* this is block of bitmap */ /* this is block of bitmap */
BUG_ON(incore != NULL); BUG_ON(incore != NULL);
mb_debug("put bitmap for group %u in page %lu/%x\n", mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
group, page->index, i * blocksize); group, page->index, i * blocksize);
/* see comments in ext4_mb_put_pa() */ /* see comments in ext4_mb_put_pa() */
@ -908,6 +910,100 @@ out:
return err; return err;
} }
static noinline_for_stack
int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
{
int ret = 0;
void *bitmap;
int blocks_per_page;
int block, pnum, poff;
int num_grp_locked = 0;
struct ext4_group_info *this_grp;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct inode *inode = sbi->s_buddy_cache;
struct page *page = NULL, *bitmap_page = NULL;
mb_debug(1, "init group %u\n", group);
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
this_grp = ext4_get_group_info(sb, group);
/*
* This ensures that we don't reinit the buddy cache
* page which map to the group from which we are already
* allocating. If we are looking at the buddy cache we would
* have taken a reference using ext4_mb_load_buddy and that
* would have taken the alloc_sem lock.
*/
num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
/*
* somebody initialized the group
* return without doing anything
*/
ret = 0;
goto err;
}
/*
* the buddy cache inode stores the block bitmap
* and buddy information in consecutive blocks.
* So for each group we need two blocks.
*/
block = group * 2;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
if (page) {
BUG_ON(page->mapping != inode->i_mapping);
ret = ext4_mb_init_cache(page, NULL);
if (ret) {
unlock_page(page);
goto err;
}
unlock_page(page);
}
if (page == NULL || !PageUptodate(page)) {
ret = -EIO;
goto err;
}
mark_page_accessed(page);
bitmap_page = page;
bitmap = page_address(page) + (poff * sb->s_blocksize);
/* init buddy cache */
block++;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
if (page == bitmap_page) {
/*
* If both the bitmap and buddy are in
* the same page we don't need to force
* init the buddy
*/
unlock_page(page);
} else if (page) {
BUG_ON(page->mapping != inode->i_mapping);
ret = ext4_mb_init_cache(page, bitmap);
if (ret) {
unlock_page(page);
goto err;
}
unlock_page(page);
}
if (page == NULL || !PageUptodate(page)) {
ret = -EIO;
goto err;
}
mark_page_accessed(page);
err:
ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
if (bitmap_page)
page_cache_release(bitmap_page);
if (page)
page_cache_release(page);
return ret;
}
static noinline_for_stack int static noinline_for_stack int
ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
struct ext4_buddy *e4b) struct ext4_buddy *e4b)
@ -922,7 +1018,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct inode *inode = sbi->s_buddy_cache; struct inode *inode = sbi->s_buddy_cache;
mb_debug("load group %u\n", group); mb_debug(1, "load group %u\n", group);
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
grp = ext4_get_group_info(sb, group); grp = ext4_get_group_info(sb, group);
@ -941,8 +1037,26 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
* groups mapped by the page is blocked * groups mapped by the page is blocked
* till we are done with allocation * till we are done with allocation
*/ */
repeat_load_buddy:
down_read(e4b->alloc_semp); down_read(e4b->alloc_semp);
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
/* we need to check for group need init flag
* with alloc_semp held so that we can be sure
* that new blocks didn't get added to the group
* when we are loading the buddy cache
*/
up_read(e4b->alloc_semp);
/*
* we need full data about the group
* to make a good selection
*/
ret = ext4_mb_init_group(sb, group);
if (ret)
return ret;
goto repeat_load_buddy;
}
/* /*
* the buddy cache inode stores the block bitmap * the buddy cache inode stores the block bitmap
* and buddy information in consecutive blocks. * and buddy information in consecutive blocks.
@ -1360,7 +1474,7 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
ac->alloc_semp = e4b->alloc_semp; ac->alloc_semp = e4b->alloc_semp;
e4b->alloc_semp = NULL; e4b->alloc_semp = NULL;
/* store last allocated for subsequent stream allocation */ /* store last allocated for subsequent stream allocation */
if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
spin_lock(&sbi->s_md_lock); spin_lock(&sbi->s_md_lock);
sbi->s_mb_last_group = ac->ac_f_ex.fe_group; sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
sbi->s_mb_last_start = ac->ac_f_ex.fe_start; sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
@ -1837,97 +1951,6 @@ void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
} }
static noinline_for_stack
int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
{
int ret;
void *bitmap;
int blocks_per_page;
int block, pnum, poff;
int num_grp_locked = 0;
struct ext4_group_info *this_grp;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct inode *inode = sbi->s_buddy_cache;
struct page *page = NULL, *bitmap_page = NULL;
mb_debug("init group %lu\n", group);
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
this_grp = ext4_get_group_info(sb, group);
/*
* This ensures we don't add group
* to this buddy cache via resize
*/
num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
/*
* somebody initialized the group
* return without doing anything
*/
ret = 0;
goto err;
}
/*
* the buddy cache inode stores the block bitmap
* and buddy information in consecutive blocks.
* So for each group we need two blocks.
*/
block = group * 2;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
if (page) {
BUG_ON(page->mapping != inode->i_mapping);
ret = ext4_mb_init_cache(page, NULL);
if (ret) {
unlock_page(page);
goto err;
}
unlock_page(page);
}
if (page == NULL || !PageUptodate(page)) {
ret = -EIO;
goto err;
}
mark_page_accessed(page);
bitmap_page = page;
bitmap = page_address(page) + (poff * sb->s_blocksize);
/* init buddy cache */
block++;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
if (page == bitmap_page) {
/*
* If both the bitmap and buddy are in
* the same page we don't need to force
* init the buddy
*/
unlock_page(page);
} else if (page) {
BUG_ON(page->mapping != inode->i_mapping);
ret = ext4_mb_init_cache(page, bitmap);
if (ret) {
unlock_page(page);
goto err;
}
unlock_page(page);
}
if (page == NULL || !PageUptodate(page)) {
ret = -EIO;
goto err;
}
mark_page_accessed(page);
err:
ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
if (bitmap_page)
page_cache_release(bitmap_page);
if (page)
page_cache_release(page);
return ret;
}
static noinline_for_stack int static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac) ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{ {
@ -1938,11 +1961,14 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
struct ext4_sb_info *sbi; struct ext4_sb_info *sbi;
struct super_block *sb; struct super_block *sb;
struct ext4_buddy e4b; struct ext4_buddy e4b;
loff_t size, isize;
sb = ac->ac_sb; sb = ac->ac_sb;
sbi = EXT4_SB(sb); sbi = EXT4_SB(sb);
ngroups = ext4_get_groups_count(sb); ngroups = ext4_get_groups_count(sb);
/* non-extent files are limited to low blocks/groups */
if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL))
ngroups = sbi->s_blockfile_groups;
BUG_ON(ac->ac_status == AC_STATUS_FOUND); BUG_ON(ac->ac_status == AC_STATUS_FOUND);
/* first, try the goal */ /* first, try the goal */
@ -1974,20 +2000,16 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
} }
bsbits = ac->ac_sb->s_blocksize_bits; bsbits = ac->ac_sb->s_blocksize_bits;
/* if stream allocation is enabled, use global goal */
size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
isize = i_size_read(ac->ac_inode) >> bsbits;
if (size < isize)
size = isize;
if (size < sbi->s_mb_stream_request && /* if stream allocation is enabled, use global goal */
(ac->ac_flags & EXT4_MB_HINT_DATA)) { if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
/* TBD: may be hot point */ /* TBD: may be hot point */
spin_lock(&sbi->s_md_lock); spin_lock(&sbi->s_md_lock);
ac->ac_g_ex.fe_group = sbi->s_mb_last_group; ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
ac->ac_g_ex.fe_start = sbi->s_mb_last_start; ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
spin_unlock(&sbi->s_md_lock); spin_unlock(&sbi->s_md_lock);
} }
/* Let's just scan groups to find more-less suitable blocks */ /* Let's just scan groups to find more-less suitable blocks */
cr = ac->ac_2order ? 0 : 1; cr = ac->ac_2order ? 0 : 1;
/* /*
@ -2015,27 +2037,6 @@ repeat:
if (grp->bb_free == 0) if (grp->bb_free == 0)
continue; continue;
/*
* if the group is already init we check whether it is
* a good group and if not we don't load the buddy
*/
if (EXT4_MB_GRP_NEED_INIT(grp)) {
/*
* we need full data about the group
* to make a good selection
*/
err = ext4_mb_init_group(sb, group);
if (err)
goto out;
}
/*
* If the particular group doesn't satisfy our
* criteria we continue with the next group
*/
if (!ext4_mb_good_group(ac, group, cr))
continue;
err = ext4_mb_load_buddy(sb, group, &e4b); err = ext4_mb_load_buddy(sb, group, &e4b);
if (err) if (err)
goto out; goto out;
@ -2156,7 +2157,7 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN) { if (v == SEQ_START_TOKEN) {
seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s "
"%-5s %-2s %-5s %-5s %-5s %-6s\n", "%-5s %-2s %-6s %-5s %-5s %-6s\n",
"pid", "inode", "original", "goal", "result", "found", "pid", "inode", "original", "goal", "result", "found",
"grps", "cr", "flags", "merge", "tail", "broken"); "grps", "cr", "flags", "merge", "tail", "broken");
return 0; return 0;
@ -2164,7 +2165,7 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v)
if (hs->op == EXT4_MB_HISTORY_ALLOC) { if (hs->op == EXT4_MB_HISTORY_ALLOC) {
fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u "
"%-5u %-5s %-5u %-6u\n"; "0x%04x %-5s %-5u %-6u\n";
sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
hs->result.fe_start, hs->result.fe_len, hs->result.fe_start, hs->result.fe_len,
hs->result.fe_logical); hs->result.fe_logical);
@ -2205,7 +2206,7 @@ static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v)
{ {
} }
static struct seq_operations ext4_mb_seq_history_ops = { static const struct seq_operations ext4_mb_seq_history_ops = {
.start = ext4_mb_seq_history_start, .start = ext4_mb_seq_history_start,
.next = ext4_mb_seq_history_next, .next = ext4_mb_seq_history_next,
.stop = ext4_mb_seq_history_stop, .stop = ext4_mb_seq_history_stop,
@ -2287,7 +2288,7 @@ static ssize_t ext4_mb_seq_history_write(struct file *file,
return count; return count;
} }
static struct file_operations ext4_mb_seq_history_fops = { static const struct file_operations ext4_mb_seq_history_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.open = ext4_mb_seq_history_open, .open = ext4_mb_seq_history_open,
.read = seq_read, .read = seq_read,
@ -2328,7 +2329,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
struct ext4_buddy e4b; struct ext4_buddy e4b;
struct sg { struct sg {
struct ext4_group_info info; struct ext4_group_info info;
unsigned short counters[16]; ext4_grpblk_t counters[16];
} sg; } sg;
group--; group--;
@ -2366,7 +2367,7 @@ static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
{ {
} }
static struct seq_operations ext4_mb_seq_groups_ops = { static const struct seq_operations ext4_mb_seq_groups_ops = {
.start = ext4_mb_seq_groups_start, .start = ext4_mb_seq_groups_start,
.next = ext4_mb_seq_groups_next, .next = ext4_mb_seq_groups_next,
.stop = ext4_mb_seq_groups_stop, .stop = ext4_mb_seq_groups_stop,
@ -2387,7 +2388,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
} }
static struct file_operations ext4_mb_seq_groups_fops = { static const struct file_operations ext4_mb_seq_groups_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.open = ext4_mb_seq_groups_open, .open = ext4_mb_seq_groups_open,
.read = seq_read, .read = seq_read,
@ -2532,7 +2533,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
init_rwsem(&meta_group_info[i]->alloc_sem); init_rwsem(&meta_group_info[i]->alloc_sem);
meta_group_info[i]->bb_free_root.rb_node = NULL;; meta_group_info[i]->bb_free_root.rb_node = NULL;
#ifdef DOUBLE_CHECK #ifdef DOUBLE_CHECK
{ {
@ -2558,26 +2559,15 @@ exit_meta_group_info:
return -ENOMEM; return -ENOMEM;
} /* ext4_mb_add_groupinfo */ } /* ext4_mb_add_groupinfo */
/*
* Update an existing group.
* This function is used for online resize
*/
void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
{
grp->bb_free += add;
}
static int ext4_mb_init_backend(struct super_block *sb) static int ext4_mb_init_backend(struct super_block *sb)
{ {
ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t i; ext4_group_t i;
int metalen;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es; struct ext4_super_block *es = sbi->s_es;
int num_meta_group_infos; int num_meta_group_infos;
int num_meta_group_infos_max; int num_meta_group_infos_max;
int array_size; int array_size;
struct ext4_group_info **meta_group_info;
struct ext4_group_desc *desc; struct ext4_group_desc *desc;
/* This is the number of blocks used by GDT */ /* This is the number of blocks used by GDT */
@ -2622,22 +2612,6 @@ static int ext4_mb_init_backend(struct super_block *sb)
goto err_freesgi; goto err_freesgi;
} }
EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb);
for (i = 0; i < num_meta_group_infos; i++) {
if ((i + 1) == num_meta_group_infos)
metalen = sizeof(*meta_group_info) *
(ngroups -
(i << EXT4_DESC_PER_BLOCK_BITS(sb)));
meta_group_info = kmalloc(metalen, GFP_KERNEL);
if (meta_group_info == NULL) {
printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
"buddy group\n");
goto err_freemeta;
}
sbi->s_group_info[i] = meta_group_info;
}
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++) {
desc = ext4_get_group_desc(sb, i, NULL); desc = ext4_get_group_desc(sb, i, NULL);
if (desc == NULL) { if (desc == NULL) {
@ -2655,7 +2629,6 @@ err_freebuddy:
while (i-- > 0) while (i-- > 0)
kfree(ext4_get_group_info(sb, i)); kfree(ext4_get_group_info(sb, i));
i = num_meta_group_infos; i = num_meta_group_infos;
err_freemeta:
while (i-- > 0) while (i-- > 0)
kfree(sbi->s_group_info[i]); kfree(sbi->s_group_info[i]);
iput(sbi->s_buddy_cache); iput(sbi->s_buddy_cache);
@ -2672,14 +2645,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
unsigned max; unsigned max;
int ret; int ret;
i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_offsets == NULL) { if (sbi->s_mb_offsets == NULL) {
return -ENOMEM; return -ENOMEM;
} }
i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_maxs == NULL) { if (sbi->s_mb_maxs == NULL) {
kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_offsets);
@ -2758,7 +2731,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
kmem_cache_free(ext4_pspace_cachep, pa); kmem_cache_free(ext4_pspace_cachep, pa);
} }
if (count) if (count)
mb_debug("mballoc: %u PAs left\n", count); mb_debug(1, "mballoc: %u PAs left\n", count);
} }
@ -2839,7 +2812,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
list_for_each_safe(l, ltmp, &txn->t_private_list) { list_for_each_safe(l, ltmp, &txn->t_private_list) {
entry = list_entry(l, struct ext4_free_data, list); entry = list_entry(l, struct ext4_free_data, list);
mb_debug("gonna free %u blocks in group %u (0x%p):", mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
entry->count, entry->group, entry); entry->count, entry->group, entry);
err = ext4_mb_load_buddy(sb, entry->group, &e4b); err = ext4_mb_load_buddy(sb, entry->group, &e4b);
@ -2874,9 +2847,43 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
ext4_mb_release_desc(&e4b); ext4_mb_release_desc(&e4b);
} }
mb_debug("freed %u blocks in %u structures\n", count, count2); mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
} }
#ifdef CONFIG_EXT4_DEBUG
u8 mb_enable_debug __read_mostly;
static struct dentry *debugfs_dir;
static struct dentry *debugfs_debug;
static void __init ext4_create_debugfs_entry(void)
{
debugfs_dir = debugfs_create_dir("ext4", NULL);
if (debugfs_dir)
debugfs_debug = debugfs_create_u8("mballoc-debug",
S_IRUGO | S_IWUSR,
debugfs_dir,
&mb_enable_debug);
}
static void ext4_remove_debugfs_entry(void)
{
debugfs_remove(debugfs_debug);
debugfs_remove(debugfs_dir);
}
#else
static void __init ext4_create_debugfs_entry(void)
{
}
static void ext4_remove_debugfs_entry(void)
{
}
#endif
int __init init_ext4_mballoc(void) int __init init_ext4_mballoc(void)
{ {
ext4_pspace_cachep = ext4_pspace_cachep =
@ -2904,6 +2911,7 @@ int __init init_ext4_mballoc(void)
kmem_cache_destroy(ext4_ac_cachep); kmem_cache_destroy(ext4_ac_cachep);
return -ENOMEM; return -ENOMEM;
} }
ext4_create_debugfs_entry();
return 0; return 0;
} }
@ -2917,6 +2925,7 @@ void exit_ext4_mballoc(void)
kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep); kmem_cache_destroy(ext4_ac_cachep);
kmem_cache_destroy(ext4_free_ext_cachep); kmem_cache_destroy(ext4_free_ext_cachep);
ext4_remove_debugfs_entry();
} }
@ -3061,7 +3070,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
else else
ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
mb_debug("#%u: goal %u blocks for locality group\n", mb_debug(1, "#%u: goal %u blocks for locality group\n",
current->pid, ac->ac_g_ex.fe_len); current->pid, ac->ac_g_ex.fe_len);
} }
@ -3180,23 +3189,18 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
ac->ac_o_ex.fe_logical < pa->pa_lstart)); ac->ac_o_ex.fe_logical < pa->pa_lstart));
/* skip PA normalized request doesn't overlap with */ /* skip PAs this normalized request doesn't overlap with */
if (pa->pa_lstart >= end) { if (pa->pa_lstart >= end || pa_end <= start) {
spin_unlock(&pa->pa_lock);
continue;
}
if (pa_end <= start) {
spin_unlock(&pa->pa_lock); spin_unlock(&pa->pa_lock);
continue; continue;
} }
BUG_ON(pa->pa_lstart <= start && pa_end >= end); BUG_ON(pa->pa_lstart <= start && pa_end >= end);
/* adjust start or end to be adjacent to this pa */
if (pa_end <= ac->ac_o_ex.fe_logical) { if (pa_end <= ac->ac_o_ex.fe_logical) {
BUG_ON(pa_end < start); BUG_ON(pa_end < start);
start = pa_end; start = pa_end;
} } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
BUG_ON(pa->pa_lstart > end); BUG_ON(pa->pa_lstart > end);
end = pa->pa_lstart; end = pa->pa_lstart;
} }
@ -3251,7 +3255,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
} }
mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size, mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
(unsigned) orig_size, (unsigned) start); (unsigned) orig_size, (unsigned) start);
} }
@ -3300,7 +3304,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
BUG_ON(pa->pa_free < len); BUG_ON(pa->pa_free < len);
pa->pa_free -= len; pa->pa_free -= len;
mb_debug("use %llu/%u from inode pa %p\n", start, len, pa); mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
} }
/* /*
@ -3324,7 +3328,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
* in on-disk bitmap -- see ext4_mb_release_context() * in on-disk bitmap -- see ext4_mb_release_context()
* Other CPUs are prevented from allocating from this pa by lg_mutex * Other CPUs are prevented from allocating from this pa by lg_mutex
*/ */
mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
} }
/* /*
@ -3382,6 +3386,11 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len)
continue; continue;
/* non-extent files can't have physical blocks past 2^32 */
if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) &&
pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
continue;
/* found preallocated blocks, use them */ /* found preallocated blocks, use them */
spin_lock(&pa->pa_lock); spin_lock(&pa->pa_lock);
if (pa->pa_deleted == 0 && pa->pa_free) { if (pa->pa_deleted == 0 && pa->pa_free) {
@ -3503,7 +3512,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
preallocated += len; preallocated += len;
count++; count++;
} }
mb_debug("prellocated %u for group %u\n", preallocated, group); mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
} }
static void ext4_mb_pa_callback(struct rcu_head *head) static void ext4_mb_pa_callback(struct rcu_head *head)
@ -3638,7 +3647,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
pa->pa_deleted = 0; pa->pa_deleted = 0;
pa->pa_type = MB_INODE_PA; pa->pa_type = MB_INODE_PA;
mb_debug("new inode pa %p: %llu/%u for %u\n", pa, mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
pa->pa_pstart, pa->pa_len, pa->pa_lstart); pa->pa_pstart, pa->pa_len, pa->pa_lstart);
trace_ext4_mb_new_inode_pa(ac, pa); trace_ext4_mb_new_inode_pa(ac, pa);
@ -3698,7 +3707,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
pa->pa_deleted = 0; pa->pa_deleted = 0;
pa->pa_type = MB_GROUP_PA; pa->pa_type = MB_GROUP_PA;
mb_debug("new group pa %p: %llu/%u for %u\n", pa, mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
pa->pa_pstart, pa->pa_len, pa->pa_lstart); pa->pa_pstart, pa->pa_len, pa->pa_lstart);
trace_ext4_mb_new_group_pa(ac, pa); trace_ext4_mb_new_group_pa(ac, pa);
@ -3777,7 +3786,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
next = mb_find_next_bit(bitmap_bh->b_data, end, bit); next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
le32_to_cpu(sbi->s_es->s_first_data_block); le32_to_cpu(sbi->s_es->s_first_data_block);
mb_debug(" free preallocated %u/%u in group %u\n", mb_debug(1, " free preallocated %u/%u in group %u\n",
(unsigned) start, (unsigned) next - bit, (unsigned) start, (unsigned) next - bit,
(unsigned) group); (unsigned) group);
free += next - bit; free += next - bit;
@ -3868,7 +3877,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
int busy = 0; int busy = 0;
int free = 0; int free = 0;
mb_debug("discard preallocation for group %u\n", group); mb_debug(1, "discard preallocation for group %u\n", group);
if (list_empty(&grp->bb_prealloc_list)) if (list_empty(&grp->bb_prealloc_list))
return 0; return 0;
@ -3992,7 +4001,7 @@ void ext4_discard_preallocations(struct inode *inode)
return; return;
} }
mb_debug("discard preallocation for inode %lu\n", inode->i_ino); mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
trace_ext4_discard_preallocations(inode); trace_ext4_discard_preallocations(inode);
INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&list);
@ -4097,7 +4106,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
{ {
BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
} }
#ifdef MB_DEBUG #ifdef CONFIG_EXT4_DEBUG
static void ext4_mb_show_ac(struct ext4_allocation_context *ac) static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
{ {
struct super_block *sb = ac->ac_sb; struct super_block *sb = ac->ac_sb;
@ -4139,14 +4148,14 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
ext4_get_group_no_and_offset(sb, pa->pa_pstart, ext4_get_group_no_and_offset(sb, pa->pa_pstart,
NULL, &start); NULL, &start);
spin_unlock(&pa->pa_lock); spin_unlock(&pa->pa_lock);
printk(KERN_ERR "PA:%lu:%d:%u \n", i, printk(KERN_ERR "PA:%u:%d:%u \n", i,
start, pa->pa_len); start, pa->pa_len);
} }
ext4_unlock_group(sb, i); ext4_unlock_group(sb, i);
if (grp->bb_free == 0) if (grp->bb_free == 0)
continue; continue;
printk(KERN_ERR "%lu: %d/%d \n", printk(KERN_ERR "%u: %d/%d \n",
i, grp->bb_free, grp->bb_fragments); i, grp->bb_free, grp->bb_fragments);
} }
printk(KERN_ERR "\n"); printk(KERN_ERR "\n");
@ -4174,17 +4183,27 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
return; return;
size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
isize = i_size_read(ac->ac_inode) >> bsbits;
size = max(size, isize);
/* don't use group allocation for large files */
if (size >= sbi->s_mb_stream_request)
return;
if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
return; return;
size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
>> bsbits;
size = max(size, isize);
if ((size == isize) &&
!ext4_fs_is_busy(sbi) &&
(atomic_read(&ac->ac_inode->i_writecount) == 0)) {
ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
return;
}
/* don't use group allocation for large files */
if (size >= sbi->s_mb_stream_request) {
ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
return;
}
BUG_ON(ac->ac_lg != NULL); BUG_ON(ac->ac_lg != NULL);
/* /*
* locality group prealloc space are per cpu. The reason for having * locality group prealloc space are per cpu. The reason for having
@ -4246,7 +4265,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
* locality group. this is a policy, actually */ * locality group. this is a policy, actually */
ext4_mb_group_or_file(ac); ext4_mb_group_or_file(ac);
mb_debug("init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
"left: %u/%u, right %u/%u to %swritable\n", "left: %u/%u, right %u/%u to %swritable\n",
(unsigned) ar->len, (unsigned) ar->logical, (unsigned) ar->len, (unsigned) ar->logical,
(unsigned) ar->goal, ac->ac_flags, ac->ac_2order, (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
@ -4268,7 +4287,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
struct ext4_prealloc_space *pa, *tmp; struct ext4_prealloc_space *pa, *tmp;
struct ext4_allocation_context *ac; struct ext4_allocation_context *ac;
mb_debug("discard locality group preallocation\n"); mb_debug(1, "discard locality group preallocation\n");
INIT_LIST_HEAD(&discard_list); INIT_LIST_HEAD(&discard_list);
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);

View File

@ -37,11 +37,19 @@
/* /*
*/ */
#define MB_DEBUG__ #ifdef CONFIG_EXT4_DEBUG
#ifdef MB_DEBUG extern u8 mb_enable_debug;
#define mb_debug(fmt, a...) printk(fmt, ##a)
#define mb_debug(n, fmt, a...) \
do { \
if ((n) <= mb_enable_debug) { \
printk(KERN_DEBUG "(%s, %d): %s: ", \
__FILE__, __LINE__, __func__); \
printk(fmt, ## a); \
} \
} while (0)
#else #else
#define mb_debug(fmt, a...) #define mb_debug(n, fmt, a...)
#endif #endif
/* /*
@ -128,8 +136,8 @@ struct ext4_prealloc_space {
unsigned pa_deleted; unsigned pa_deleted;
ext4_fsblk_t pa_pstart; /* phys. block */ ext4_fsblk_t pa_pstart; /* phys. block */
ext4_lblk_t pa_lstart; /* log. block */ ext4_lblk_t pa_lstart; /* log. block */
unsigned short pa_len; /* len of preallocated chunk */ ext4_grpblk_t pa_len; /* len of preallocated chunk */
unsigned short pa_free; /* how many blocks are free */ ext4_grpblk_t pa_free; /* how many blocks are free */
unsigned short pa_type; /* pa type. inode or group */ unsigned short pa_type; /* pa type. inode or group */
spinlock_t *pa_obj_lock; spinlock_t *pa_obj_lock;
struct inode *pa_inode; /* hack, for history only */ struct inode *pa_inode; /* hack, for history only */
@ -144,7 +152,7 @@ struct ext4_free_extent {
ext4_lblk_t fe_logical; ext4_lblk_t fe_logical;
ext4_grpblk_t fe_start; ext4_grpblk_t fe_start;
ext4_group_t fe_group; ext4_group_t fe_group;
int fe_len; ext4_grpblk_t fe_len;
}; };
/* /*

View File

@ -353,17 +353,16 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
down_write(&EXT4_I(inode)->i_data_sem); down_write(&EXT4_I(inode)->i_data_sem);
/* /*
* if EXT4_EXT_MIGRATE is cleared a block allocation * if EXT4_STATE_EXT_MIGRATE is cleared a block allocation
* happened after we started the migrate. We need to * happened after we started the migrate. We need to
* fail the migrate * fail the migrate
*/ */
if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) { if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) {
retval = -EAGAIN; retval = -EAGAIN;
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
goto err_out; goto err_out;
} else } else
EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags & EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
~EXT4_EXT_MIGRATE;
/* /*
* We have the extent map build with the tmp inode. * We have the extent map build with the tmp inode.
* Now copy the i_data across * Now copy the i_data across
@ -517,14 +516,15 @@ int ext4_ext_migrate(struct inode *inode)
* when we add extents we extent the journal * when we add extents we extent the journal
*/ */
/* /*
* Even though we take i_mutex we can still cause block allocation * Even though we take i_mutex we can still cause block
* via mmap write to holes. If we have allocated new blocks we fail * allocation via mmap write to holes. If we have allocated
* migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. * new blocks we fail migrate. New block allocation will
* The flag is updated with i_data_sem held to prevent racing with * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated
* block allocation. * with i_data_sem held to prevent racing with block
* allocation.
*/ */
down_read((&EXT4_I(inode)->i_data_sem)); down_read((&EXT4_I(inode)->i_data_sem));
EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE; EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE;
up_read((&EXT4_I(inode)->i_data_sem)); up_read((&EXT4_I(inode)->i_data_sem));
handle = ext4_journal_start(inode, 1); handle = ext4_journal_start(inode, 1);
@ -618,7 +618,7 @@ err_out:
tmp_inode->i_nlink = 0; tmp_inode->i_nlink = 0;
ext4_journal_stop(handle); ext4_journal_stop(handle);
unlock_new_inode(tmp_inode);
iput(tmp_inode); iput(tmp_inode);
return retval; return retval;

View File

@ -19,14 +19,31 @@
#include "ext4_extents.h" #include "ext4_extents.h"
#include "ext4.h" #include "ext4.h"
#define get_ext_path(path, inode, block, ret) \ /**
do { \ * get_ext_path - Find an extent path for designated logical block number.
path = ext4_ext_find_extent(inode, block, path); \ *
if (IS_ERR(path)) { \ * @inode: an inode which is searched
ret = PTR_ERR(path); \ * @lblock: logical block number to find an extent path
path = NULL; \ * @path: pointer to an extent path pointer (for output)
} \ *
} while (0) * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value
* on failure.
*/
static inline int
get_ext_path(struct inode *inode, ext4_lblk_t lblock,
struct ext4_ext_path **path)
{
int ret = 0;
*path = ext4_ext_find_extent(inode, lblock, *path);
if (IS_ERR(*path)) {
ret = PTR_ERR(*path);
*path = NULL;
} else if ((*path)[ext_depth(inode)].p_ext == NULL)
ret = -ENODATA;
return ret;
}
/** /**
* copy_extent_status - Copy the extent's initialization status * copy_extent_status - Copy the extent's initialization status
@ -112,6 +129,31 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
return 1; return 1;
} }
/**
* mext_check_null_inode - NULL check for two inodes
*
* If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
*/
static int
mext_check_null_inode(struct inode *inode1, struct inode *inode2,
const char *function)
{
int ret = 0;
if (inode1 == NULL) {
ext4_error(inode2->i_sb, function,
"Both inodes should not be NULL: "
"inode1 NULL inode2 %lu", inode2->i_ino);
ret = -EIO;
} else if (inode2 == NULL) {
ext4_error(inode1->i_sb, function,
"Both inodes should not be NULL: "
"inode1 %lu inode2 NULL", inode1->i_ino);
ret = -EIO;
}
return ret;
}
/** /**
* mext_double_down_read - Acquire two inodes' read semaphore * mext_double_down_read - Acquire two inodes' read semaphore
* *
@ -124,8 +166,6 @@ mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
{ {
struct inode *first = orig_inode, *second = donor_inode; struct inode *first = orig_inode, *second = donor_inode;
BUG_ON(orig_inode == NULL || donor_inode == NULL);
/* /*
* Use the inode number to provide the stable locking order instead * Use the inode number to provide the stable locking order instead
* of its address, because the C language doesn't guarantee you can * of its address, because the C language doesn't guarantee you can
@ -152,8 +192,6 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
{ {
struct inode *first = orig_inode, *second = donor_inode; struct inode *first = orig_inode, *second = donor_inode;
BUG_ON(orig_inode == NULL || donor_inode == NULL);
/* /*
* Use the inode number to provide the stable locking order instead * Use the inode number to provide the stable locking order instead
* of its address, because the C language doesn't guarantee you can * of its address, because the C language doesn't guarantee you can
@ -178,8 +216,6 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
static void static void
mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
{ {
BUG_ON(orig_inode == NULL || donor_inode == NULL);
up_read(&EXT4_I(orig_inode)->i_data_sem); up_read(&EXT4_I(orig_inode)->i_data_sem);
up_read(&EXT4_I(donor_inode)->i_data_sem); up_read(&EXT4_I(donor_inode)->i_data_sem);
} }
@ -194,8 +230,6 @@ mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
static void static void
mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
{ {
BUG_ON(orig_inode == NULL || donor_inode == NULL);
up_write(&EXT4_I(orig_inode)->i_data_sem); up_write(&EXT4_I(orig_inode)->i_data_sem);
up_write(&EXT4_I(donor_inode)->i_data_sem); up_write(&EXT4_I(donor_inode)->i_data_sem);
} }
@ -283,8 +317,8 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
} }
if (new_flag) { if (new_flag) {
get_ext_path(orig_path, orig_inode, eblock, err); err = get_ext_path(orig_inode, eblock, &orig_path);
if (orig_path == NULL) if (err)
goto out; goto out;
if (ext4_ext_insert_extent(handle, orig_inode, if (ext4_ext_insert_extent(handle, orig_inode,
@ -293,9 +327,9 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
} }
if (end_flag) { if (end_flag) {
get_ext_path(orig_path, orig_inode, err = get_ext_path(orig_inode,
le32_to_cpu(end_ext->ee_block) - 1, err); le32_to_cpu(end_ext->ee_block) - 1, &orig_path);
if (orig_path == NULL) if (err)
goto out; goto out;
if (ext4_ext_insert_extent(handle, orig_inode, if (ext4_ext_insert_extent(handle, orig_inode,
@ -519,7 +553,15 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
* oext |-----------| * oext |-----------|
* new_ext |-------| * new_ext |-------|
*/ */
BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end); if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
ext4_error(orig_inode->i_sb, __func__,
"new_ext_end(%u) should be less than or equal to "
"oext->ee_block(%u) + oext_alen(%d) - 1",
new_ext_end, le32_to_cpu(oext->ee_block),
oext_alen);
ret = -EIO;
goto out;
}
/* /*
* Case: new_ext is smaller than original extent * Case: new_ext is smaller than original extent
@ -543,6 +585,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
ret = mext_insert_extents(handle, orig_inode, orig_path, o_start, ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
o_end, &start_ext, &new_ext, &end_ext); o_end, &start_ext, &new_ext, &end_ext);
out:
return ret; return ret;
} }
@ -554,8 +597,10 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
* @orig_off: block offset of original inode * @orig_off: block offset of original inode
* @donor_off: block offset of donor inode * @donor_off: block offset of donor inode
* @max_count: the maximun length of extents * @max_count: the maximun length of extents
*
* Return 0 on success, or a negative error value on failure.
*/ */
static void static int
mext_calc_swap_extents(struct ext4_extent *tmp_dext, mext_calc_swap_extents(struct ext4_extent *tmp_dext,
struct ext4_extent *tmp_oext, struct ext4_extent *tmp_oext,
ext4_lblk_t orig_off, ext4_lblk_t donor_off, ext4_lblk_t orig_off, ext4_lblk_t donor_off,
@ -564,6 +609,19 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
ext4_lblk_t diff, orig_diff; ext4_lblk_t diff, orig_diff;
struct ext4_extent dext_old, oext_old; struct ext4_extent dext_old, oext_old;
BUG_ON(orig_off != donor_off);
/* original and donor extents have to cover the same block offset */
if (orig_off < le32_to_cpu(tmp_oext->ee_block) ||
le32_to_cpu(tmp_oext->ee_block) +
ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off)
return -ENODATA;
if (orig_off < le32_to_cpu(tmp_dext->ee_block) ||
le32_to_cpu(tmp_dext->ee_block) +
ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off)
return -ENODATA;
dext_old = *tmp_dext; dext_old = *tmp_dext;
oext_old = *tmp_oext; oext_old = *tmp_oext;
@ -591,6 +649,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
copy_extent_status(&oext_old, tmp_dext); copy_extent_status(&oext_old, tmp_dext);
copy_extent_status(&dext_old, tmp_oext); copy_extent_status(&dext_old, tmp_oext);
return 0;
} }
/** /**
@ -631,13 +691,13 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
mext_double_down_write(orig_inode, donor_inode); mext_double_down_write(orig_inode, donor_inode);
/* Get the original extent for the block "orig_off" */ /* Get the original extent for the block "orig_off" */
get_ext_path(orig_path, orig_inode, orig_off, err); err = get_ext_path(orig_inode, orig_off, &orig_path);
if (orig_path == NULL) if (err)
goto out; goto out;
/* Get the donor extent for the head */ /* Get the donor extent for the head */
get_ext_path(donor_path, donor_inode, donor_off, err); err = get_ext_path(donor_inode, donor_off, &donor_path);
if (donor_path == NULL) if (err)
goto out; goto out;
depth = ext_depth(orig_inode); depth = ext_depth(orig_inode);
oext = orig_path[depth].p_ext; oext = orig_path[depth].p_ext;
@ -647,13 +707,28 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
dext = donor_path[depth].p_ext; dext = donor_path[depth].p_ext;
tmp_dext = *dext; tmp_dext = *dext;
mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
donor_off, count); donor_off, count);
if (err)
goto out;
/* Loop for the donor extents */ /* Loop for the donor extents */
while (1) { while (1) {
/* The extent for donor must be found. */ /* The extent for donor must be found. */
BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block)); if (!dext) {
ext4_error(donor_inode->i_sb, __func__,
"The extent for donor must be found");
err = -EIO;
goto out;
} else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
ext4_error(donor_inode->i_sb, __func__,
"Donor offset(%u) and the first block of donor "
"extent(%u) should be equal",
donor_off,
le32_to_cpu(tmp_dext.ee_block));
err = -EIO;
goto out;
}
/* Set donor extent to orig extent */ /* Set donor extent to orig extent */
err = mext_leaf_block(handle, orig_inode, err = mext_leaf_block(handle, orig_inode,
@ -678,8 +753,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
if (orig_path) if (orig_path)
ext4_ext_drop_refs(orig_path); ext4_ext_drop_refs(orig_path);
get_ext_path(orig_path, orig_inode, orig_off, err); err = get_ext_path(orig_inode, orig_off, &orig_path);
if (orig_path == NULL) if (err)
goto out; goto out;
depth = ext_depth(orig_inode); depth = ext_depth(orig_inode);
oext = orig_path[depth].p_ext; oext = orig_path[depth].p_ext;
@ -692,9 +767,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
if (donor_path) if (donor_path)
ext4_ext_drop_refs(donor_path); ext4_ext_drop_refs(donor_path);
get_ext_path(donor_path, donor_inode, err = get_ext_path(donor_inode, donor_off, &donor_path);
donor_off, err); if (err)
if (donor_path == NULL)
goto out; goto out;
depth = ext_depth(donor_inode); depth = ext_depth(donor_inode);
dext = donor_path[depth].p_ext; dext = donor_path[depth].p_ext;
@ -705,9 +779,10 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
} }
tmp_dext = *dext; tmp_dext = *dext;
mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
donor_off, donor_off, count - replaced_count);
count - replaced_count); if (err)
goto out;
} }
out: out:
@ -740,7 +815,7 @@ out:
* on success, or a negative error value on failure. * on success, or a negative error value on failure.
*/ */
static int static int
move_extent_par_page(struct file *o_filp, struct inode *donor_inode, move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
pgoff_t orig_page_offset, int data_offset_in_page, pgoff_t orig_page_offset, int data_offset_in_page,
int block_len_in_page, int uninit) int block_len_in_page, int uninit)
{ {
@ -871,6 +946,7 @@ out:
if (PageLocked(page)) if (PageLocked(page))
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
ext4_journal_stop(handle);
} }
out2: out2:
ext4_journal_stop(handle); ext4_journal_stop(handle);
@ -897,6 +973,10 @@ mext_check_arguments(struct inode *orig_inode,
struct inode *donor_inode, __u64 orig_start, struct inode *donor_inode, __u64 orig_start,
__u64 donor_start, __u64 *len, __u64 moved_len) __u64 donor_start, __u64 *len, __u64 moved_len)
{ {
ext4_lblk_t orig_blocks, donor_blocks;
unsigned int blkbits = orig_inode->i_blkbits;
unsigned int blocksize = 1 << blkbits;
/* Regular file check */ /* Regular file check */
if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
ext4_debug("ext4 move extent: The argument files should be " ext4_debug("ext4 move extent: The argument files should be "
@ -960,54 +1040,58 @@ mext_check_arguments(struct inode *orig_inode,
return -EINVAL; return -EINVAL;
} }
if ((orig_start > MAX_DEFRAG_SIZE) || if ((orig_start > EXT_MAX_BLOCK) ||
(donor_start > MAX_DEFRAG_SIZE) || (donor_start > EXT_MAX_BLOCK) ||
(*len > MAX_DEFRAG_SIZE) || (*len > EXT_MAX_BLOCK) ||
(orig_start + *len > MAX_DEFRAG_SIZE)) { (orig_start + *len > EXT_MAX_BLOCK)) {
ext4_debug("ext4 move extent: Can't handle over [%lu] blocks " ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
"[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE, "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK,
orig_inode->i_ino, donor_inode->i_ino); orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL; return -EINVAL;
} }
if (orig_inode->i_size > donor_inode->i_size) { if (orig_inode->i_size > donor_inode->i_size) {
if (orig_start >= donor_inode->i_size) { donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits;
/* TODO: eliminate this artificial restriction */
if (orig_start >= donor_blocks) {
ext4_debug("ext4 move extent: orig start offset " ext4_debug("ext4 move extent: orig start offset "
"[%llu] should be less than donor file size " "[%llu] should be less than donor file blocks "
"[%lld] [ino:orig %lu, donor_inode %lu]\n", "[%u] [ino:orig %lu, donor %lu]\n",
orig_start, donor_inode->i_size, orig_start, donor_blocks,
orig_inode->i_ino, donor_inode->i_ino); orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL; return -EINVAL;
} }
if (orig_start + *len > donor_inode->i_size) { /* TODO: eliminate this artificial restriction */
if (orig_start + *len > donor_blocks) {
ext4_debug("ext4 move extent: End offset [%llu] should " ext4_debug("ext4 move extent: End offset [%llu] should "
"be less than donor file size [%lld]." "be less than donor file blocks [%u]."
"So adjust length from %llu to %lld " "So adjust length from %llu to %llu "
"[ino:orig %lu, donor %lu]\n", "[ino:orig %lu, donor %lu]\n",
orig_start + *len, donor_inode->i_size, orig_start + *len, donor_blocks,
*len, donor_inode->i_size - orig_start, *len, donor_blocks - orig_start,
orig_inode->i_ino, donor_inode->i_ino); orig_inode->i_ino, donor_inode->i_ino);
*len = donor_inode->i_size - orig_start; *len = donor_blocks - orig_start;
} }
} else { } else {
if (orig_start >= orig_inode->i_size) { orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits;
if (orig_start >= orig_blocks) {
ext4_debug("ext4 move extent: start offset [%llu] " ext4_debug("ext4 move extent: start offset [%llu] "
"should be less than original file size " "should be less than original file blocks "
"[%lld] [inode:orig %lu, donor %lu]\n", "[%u] [ino:orig %lu, donor %lu]\n",
orig_start, orig_inode->i_size, orig_start, orig_blocks,
orig_inode->i_ino, donor_inode->i_ino); orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL; return -EINVAL;
} }
if (orig_start + *len > orig_inode->i_size) { if (orig_start + *len > orig_blocks) {
ext4_debug("ext4 move extent: Adjust length " ext4_debug("ext4 move extent: Adjust length "
"from %llu to %lld. Because it should be " "from %llu to %llu. Because it should be "
"less than original file size " "less than original file blocks "
"[ino:orig %lu, donor %lu]\n", "[ino:orig %lu, donor %lu]\n",
*len, orig_inode->i_size - orig_start, *len, orig_blocks - orig_start,
orig_inode->i_ino, donor_inode->i_ino); orig_inode->i_ino, donor_inode->i_ino);
*len = orig_inode->i_size - orig_start; *len = orig_blocks - orig_start;
} }
} }
@ -1027,18 +1111,23 @@ mext_check_arguments(struct inode *orig_inode,
* @inode1: the inode structure * @inode1: the inode structure
* @inode2: the inode structure * @inode2: the inode structure
* *
* Lock two inodes' i_mutex by i_ino order. This function is moved from * Lock two inodes' i_mutex by i_ino order.
* fs/inode.c. * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
*/ */
static void static int
mext_inode_double_lock(struct inode *inode1, struct inode *inode2) mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
{ {
if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { int ret = 0;
if (inode1)
mutex_lock(&inode1->i_mutex); BUG_ON(inode1 == NULL && inode2 == NULL);
else if (inode2)
mutex_lock(&inode2->i_mutex); ret = mext_check_null_inode(inode1, inode2, __func__);
return; if (ret < 0)
goto out;
if (inode1 == inode2) {
mutex_lock(&inode1->i_mutex);
goto out;
} }
if (inode1->i_ino < inode2->i_ino) { if (inode1->i_ino < inode2->i_ino) {
@ -1048,6 +1137,9 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
} }
out:
return ret;
} }
/** /**
@ -1056,17 +1148,28 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
* @inode1: the inode that is released first * @inode1: the inode that is released first
* @inode2: the inode that is released second * @inode2: the inode that is released second
* *
* This function is moved from fs/inode.c. * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
*/ */
static void static int
mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
{ {
int ret = 0;
BUG_ON(inode1 == NULL && inode2 == NULL);
ret = mext_check_null_inode(inode1, inode2, __func__);
if (ret < 0)
goto out;
if (inode1) if (inode1)
mutex_unlock(&inode1->i_mutex); mutex_unlock(&inode1->i_mutex);
if (inode2 && inode2 != inode1) if (inode2 && inode2 != inode1)
mutex_unlock(&inode2->i_mutex); mutex_unlock(&inode2->i_mutex);
out:
return ret;
} }
/** /**
@ -1123,70 +1226,76 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
ext4_lblk_t rest_blocks; ext4_lblk_t rest_blocks;
pgoff_t orig_page_offset = 0, seq_end_page; pgoff_t orig_page_offset = 0, seq_end_page;
int ret, depth, last_extent = 0; int ret1, ret2, depth, last_extent = 0;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
int data_offset_in_page; int data_offset_in_page;
int block_len_in_page; int block_len_in_page;
int uninit; int uninit;
/* protect orig and donor against a truncate */ /* protect orig and donor against a truncate */
mext_inode_double_lock(orig_inode, donor_inode); ret1 = mext_inode_double_lock(orig_inode, donor_inode);
if (ret1 < 0)
return ret1;
mext_double_down_read(orig_inode, donor_inode); mext_double_down_read(orig_inode, donor_inode);
/* Check the filesystem environment whether move_extent can be done */ /* Check the filesystem environment whether move_extent can be done */
ret = mext_check_arguments(orig_inode, donor_inode, orig_start, ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
donor_start, &len, *moved_len); donor_start, &len, *moved_len);
mext_double_up_read(orig_inode, donor_inode); mext_double_up_read(orig_inode, donor_inode);
if (ret) if (ret1)
goto out2; goto out;
file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
block_end = block_start + len - 1; block_end = block_start + len - 1;
if (file_end < block_end) if (file_end < block_end)
len -= block_end - file_end; len -= block_end - file_end;
get_ext_path(orig_path, orig_inode, block_start, ret); ret1 = get_ext_path(orig_inode, block_start, &orig_path);
if (orig_path == NULL) if (ret1)
goto out2; goto out;
/* Get path structure to check the hole */ /* Get path structure to check the hole */
get_ext_path(holecheck_path, orig_inode, block_start, ret); ret1 = get_ext_path(orig_inode, block_start, &holecheck_path);
if (holecheck_path == NULL) if (ret1)
goto out; goto out;
depth = ext_depth(orig_inode); depth = ext_depth(orig_inode);
ext_cur = holecheck_path[depth].p_ext; ext_cur = holecheck_path[depth].p_ext;
if (ext_cur == NULL) {
ret = -EINVAL;
goto out;
}
/* /*
* Get proper extent whose ee_block is beyond block_start * Get proper starting location of block replacement if block_start was
* if block_start was within the hole. * within the hole.
*/ */
if (le32_to_cpu(ext_cur->ee_block) + if (le32_to_cpu(ext_cur->ee_block) +
ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
/*
* The hole exists between extents or the tail of
* original file.
*/
last_extent = mext_next_extent(orig_inode, last_extent = mext_next_extent(orig_inode,
holecheck_path, &ext_cur); holecheck_path, &ext_cur);
if (last_extent < 0) { if (last_extent < 0) {
ret = last_extent; ret1 = last_extent;
goto out; goto out;
} }
last_extent = mext_next_extent(orig_inode, orig_path, last_extent = mext_next_extent(orig_inode, orig_path,
&ext_dummy); &ext_dummy);
if (last_extent < 0) { if (last_extent < 0) {
ret = last_extent; ret1 = last_extent;
goto out; goto out;
} }
} seq_start = le32_to_cpu(ext_cur->ee_block);
seq_start = block_start; } else if (le32_to_cpu(ext_cur->ee_block) > block_start)
/* The hole exists at the beginning of original file. */
seq_start = le32_to_cpu(ext_cur->ee_block);
else
seq_start = block_start;
/* No blocks within the specified range. */ /* No blocks within the specified range. */
if (le32_to_cpu(ext_cur->ee_block) > block_end) { if (le32_to_cpu(ext_cur->ee_block) > block_end) {
ext4_debug("ext4 move extent: The specified range of file " ext4_debug("ext4 move extent: The specified range of file "
"may be the hole\n"); "may be the hole\n");
ret = -EINVAL; ret1 = -EINVAL;
goto out; goto out;
} }
@ -1206,7 +1315,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
last_extent = mext_next_extent(orig_inode, holecheck_path, last_extent = mext_next_extent(orig_inode, holecheck_path,
&ext_cur); &ext_cur);
if (last_extent < 0) { if (last_extent < 0) {
ret = last_extent; ret1 = last_extent;
break; break;
} }
add_blocks = ext4_ext_get_actual_len(ext_cur); add_blocks = ext4_ext_get_actual_len(ext_cur);
@ -1258,16 +1367,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
while (orig_page_offset <= seq_end_page) { while (orig_page_offset <= seq_end_page) {
/* Swap original branches with new branches */ /* Swap original branches with new branches */
ret = move_extent_par_page(o_filp, donor_inode, ret1 = move_extent_per_page(o_filp, donor_inode,
orig_page_offset, orig_page_offset,
data_offset_in_page, data_offset_in_page,
block_len_in_page, uninit); block_len_in_page, uninit);
if (ret < 0) if (ret1 < 0)
goto out; goto out;
orig_page_offset++; orig_page_offset++;
/* Count how many blocks we have exchanged */ /* Count how many blocks we have exchanged */
*moved_len += block_len_in_page; *moved_len += block_len_in_page;
BUG_ON(*moved_len > len); if (*moved_len > len) {
ext4_error(orig_inode->i_sb, __func__,
"We replaced blocks too much! "
"sum of replaced: %llu requested: %llu",
*moved_len, len);
ret1 = -EIO;
goto out;
}
data_offset_in_page = 0; data_offset_in_page = 0;
rest_blocks -= block_len_in_page; rest_blocks -= block_len_in_page;
@ -1280,17 +1396,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
/* Decrease buffer counter */ /* Decrease buffer counter */
if (holecheck_path) if (holecheck_path)
ext4_ext_drop_refs(holecheck_path); ext4_ext_drop_refs(holecheck_path);
get_ext_path(holecheck_path, orig_inode, ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path);
seq_start, ret); if (ret1)
if (holecheck_path == NULL)
break; break;
depth = holecheck_path->p_depth; depth = holecheck_path->p_depth;
/* Decrease buffer counter */ /* Decrease buffer counter */
if (orig_path) if (orig_path)
ext4_ext_drop_refs(orig_path); ext4_ext_drop_refs(orig_path);
get_ext_path(orig_path, orig_inode, seq_start, ret); ret1 = get_ext_path(orig_inode, seq_start, &orig_path);
if (orig_path == NULL) if (ret1)
break; break;
ext_cur = holecheck_path[depth].p_ext; ext_cur = holecheck_path[depth].p_ext;
@ -1307,14 +1422,13 @@ out:
ext4_ext_drop_refs(holecheck_path); ext4_ext_drop_refs(holecheck_path);
kfree(holecheck_path); kfree(holecheck_path);
} }
out2:
mext_inode_double_unlock(orig_inode, donor_inode);
if (ret) ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
return ret;
/* All of the specified blocks must be exchanged in succeed */ if (ret1)
BUG_ON(*moved_len != len); return ret1;
else if (ret2)
return ret2;
return 0; return 0;
} }

View File

@ -1518,8 +1518,12 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return retval; return retval;
if (blocks == 1 && !dx_fallback && if (blocks == 1 && !dx_fallback &&
EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
return make_indexed_dir(handle, dentry, inode, bh); retval = make_indexed_dir(handle, dentry, inode, bh);
if (retval == -ENOSPC)
brelse(bh);
return retval;
}
brelse(bh); brelse(bh);
} }
bh = ext4_append(handle, dir, &block, &retval); bh = ext4_append(handle, dir, &block, &retval);
@ -1528,7 +1532,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
de = (struct ext4_dir_entry_2 *) bh->b_data; de = (struct ext4_dir_entry_2 *) bh->b_data;
de->inode = 0; de->inode = 0;
de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
return add_dirent_to_buf(handle, dentry, inode, de, bh); retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
if (retval == -ENOSPC)
brelse(bh);
return retval;
} }
/* /*
@ -1590,9 +1597,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
goto cleanup; goto cleanup;
node2 = (struct dx_node *)(bh2->b_data); node2 = (struct dx_node *)(bh2->b_data);
entries2 = node2->entries; entries2 = node2->entries;
memset(&node2->fake, 0, sizeof(struct fake_dirent));
node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize, node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
sb->s_blocksize); sb->s_blocksize);
node2->fake.inode = 0;
BUFFER_TRACE(frame->bh, "get_write_access"); BUFFER_TRACE(frame->bh, "get_write_access");
err = ext4_journal_get_write_access(handle, frame->bh); err = ext4_journal_get_write_access(handle, frame->bh);
if (err) if (err)
@ -1657,7 +1664,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (!de) if (!de)
goto cleanup; goto cleanup;
err = add_dirent_to_buf(handle, dentry, inode, de, bh); err = add_dirent_to_buf(handle, dentry, inode, de, bh);
bh = NULL; if (err != -ENOSPC)
bh = NULL;
goto cleanup; goto cleanup;
journal_error: journal_error:
@ -2310,7 +2318,7 @@ static int ext4_link(struct dentry *old_dentry,
struct inode *inode = old_dentry->d_inode; struct inode *inode = old_dentry->d_inode;
int err, retries = 0; int err, retries = 0;
if (EXT4_DIR_LINK_MAX(inode)) if (inode->i_nlink >= EXT4_LINK_MAX)
return -EMLINK; return -EMLINK;
/* /*
@ -2413,7 +2421,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
goto end_rename; goto end_rename;
retval = -EMLINK; retval = -EMLINK;
if (!new_inode && new_dir != old_dir && if (!new_inode && new_dir != old_dir &&
new_dir->i_nlink >= EXT4_LINK_MAX) EXT4_DIR_LINK_MAX(new_dir))
goto end_rename; goto end_rename;
} }
if (!new_bh) { if (!new_bh) {

View File

@ -746,7 +746,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
struct inode *inode = NULL; struct inode *inode = NULL;
handle_t *handle; handle_t *handle;
int gdb_off, gdb_num; int gdb_off, gdb_num;
int num_grp_locked = 0;
int err, err2; int err, err2;
gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
@ -856,7 +855,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
* using the new disk blocks. * using the new disk blocks.
*/ */
num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group);
/* Update group descriptor block for new group */ /* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)((char *)primary->b_data + gdp = (struct ext4_group_desc *)((char *)primary->b_data +
gdb_off * EXT4_DESC_SIZE(sb)); gdb_off * EXT4_DESC_SIZE(sb));
@ -875,10 +873,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
* descriptor * descriptor
*/ */
err = ext4_mb_add_groupinfo(sb, input->group, gdp); err = ext4_mb_add_groupinfo(sb, input->group, gdp);
if (err) { if (err)
ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
goto exit_journal; goto exit_journal;
}
/* /*
* Make the new blocks and inodes valid next. We do this before * Make the new blocks and inodes valid next. We do this before
@ -920,7 +916,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
/* Update the global fs size fields */ /* Update the global fs size fields */
sbi->s_groups_count++; sbi->s_groups_count++;
ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
ext4_handle_dirty_metadata(handle, NULL, primary); ext4_handle_dirty_metadata(handle, NULL, primary);

View File

@ -45,6 +45,7 @@
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "xattr.h" #include "xattr.h"
#include "acl.h" #include "acl.h"
#include "mballoc.h"
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/ext4.h> #include <trace/events/ext4.h>
@ -344,7 +345,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno,
errstr = "Out of memory"; errstr = "Out of memory";
break; break;
case -EROFS: case -EROFS:
if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) if (!sb || (EXT4_SB(sb)->s_journal &&
EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
errstr = "Journal has aborted"; errstr = "Journal has aborted";
else else
errstr = "Readonly filesystem"; errstr = "Readonly filesystem";
@ -1279,11 +1281,9 @@ static int parse_options(char *options, struct super_block *sb,
*journal_devnum = option; *journal_devnum = option;
break; break;
case Opt_journal_checksum: case Opt_journal_checksum:
set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); break; /* Kept for backwards compatibility */
break;
case Opt_journal_async_commit: case Opt_journal_async_commit:
set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
break; break;
case Opt_noload: case Opt_noload:
set_opt(sbi->s_mount_opt, NOLOAD); set_opt(sbi->s_mount_opt, NOLOAD);
@ -1695,12 +1695,12 @@ static int ext4_fill_flex_info(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL); gdp = ext4_get_group_desc(sb, i, NULL);
flex_group = ext4_flex_group(sbi, i); flex_group = ext4_flex_group(sbi, i);
atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, atomic_add(ext4_free_inodes_count(sb, gdp),
ext4_free_inodes_count(sb, gdp)); &sbi->s_flex_groups[flex_group].free_inodes);
atomic_set(&sbi->s_flex_groups[flex_group].free_blocks, atomic_add(ext4_free_blks_count(sb, gdp),
ext4_free_blks_count(sb, gdp)); &sbi->s_flex_groups[flex_group].free_blocks);
atomic_set(&sbi->s_flex_groups[flex_group].used_dirs, atomic_add(ext4_used_dirs_count(sb, gdp),
ext4_used_dirs_count(sb, gdp)); &sbi->s_flex_groups[flex_group].used_dirs);
} }
return 1; return 1;
@ -2253,6 +2253,49 @@ static struct kobj_type ext4_ktype = {
.release = ext4_sb_release, .release = ext4_sb_release,
}; };
/*
* Check whether this filesystem can be mounted based on
* the features present and the RDONLY/RDWR mount requested.
* Returns 1 if this filesystem can be mounted as requested,
* 0 if it cannot be.
*/
static int ext4_feature_set_ok(struct super_block *sb, int readonly)
{
if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
ext4_msg(sb, KERN_ERR,
"Couldn't mount because of "
"unsupported optional features (%x)",
(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
~EXT4_FEATURE_INCOMPAT_SUPP));
return 0;
}
if (readonly)
return 1;
/* Check that feature set is OK for a read-write mount */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
"unsupported optional features (%x)",
(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
~EXT4_FEATURE_RO_COMPAT_SUPP));
return 0;
}
/*
* Large file size enabled file system can only be mounted
* read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
*/
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
if (sizeof(blkcnt_t) < sizeof(u64)) {
ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
"cannot be mounted RDWR without "
"CONFIG_LBDAF");
return 0;
}
}
return 1;
}
static int ext4_fill_super(struct super_block *sb, void *data, int silent) static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__releases(kernel_lock) __releases(kernel_lock)
__acquires(kernel_lock) __acquires(kernel_lock)
@ -2274,7 +2317,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
unsigned int db_count; unsigned int db_count;
unsigned int i; unsigned int i;
int needs_recovery, has_huge_files; int needs_recovery, has_huge_files;
int features;
__u64 blocks_count; __u64 blocks_count;
int err; int err;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
@ -2401,39 +2443,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* previously didn't change the revision level when setting the flags, * previously didn't change the revision level when setting the flags,
* so there is a chance incompat flags are set on a rev 0 filesystem. * so there is a chance incompat flags are set on a rev 0 filesystem.
*/ */
features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
if (features) {
ext4_msg(sb, KERN_ERR,
"Couldn't mount because of "
"unsupported optional features (%x)",
(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
~EXT4_FEATURE_INCOMPAT_SUPP));
goto failed_mount; goto failed_mount;
}
features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
if (!(sb->s_flags & MS_RDONLY) && features) {
ext4_msg(sb, KERN_ERR,
"Couldn't mount RDWR because of "
"unsupported optional features (%x)",
(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
~EXT4_FEATURE_RO_COMPAT_SUPP));
goto failed_mount;
}
has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
if (has_huge_files) {
/*
* Large file size enabled file system can only be
* mount if kernel is build with CONFIG_LBDAF
*/
if (sizeof(root->i_blocks) < sizeof(u64) &&
!(sb->s_flags & MS_RDONLY)) {
ext4_msg(sb, KERN_ERR, "Filesystem with huge "
"files cannot be mounted read-write "
"without CONFIG_LBDAF");
goto failed_mount;
}
}
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (blocksize < EXT4_MIN_BLOCK_SIZE || if (blocksize < EXT4_MIN_BLOCK_SIZE ||
@ -2469,6 +2481,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
} }
} }
has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
has_huge_files); has_huge_files);
sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
@ -2549,12 +2563,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount; goto failed_mount;
} }
if (ext4_blocks_count(es) > /*
(sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { * Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
*/
if ((ext4_blocks_count(es) >
(sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) ||
(ext4_blocks_count(es) >
(pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) {
ext4_msg(sb, KERN_ERR, "filesystem" ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely"); " too large to mount safely on this system");
if (sizeof(sector_t) < 8) if (sizeof(sector_t) < 8)
ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
ret = -EFBIG;
goto failed_mount; goto failed_mount;
} }
@ -2595,6 +2616,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount; goto failed_mount;
} }
sbi->s_groups_count = blocks_count; sbi->s_groups_count = blocks_count;
sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb); EXT4_DESC_PER_BLOCK(sb);
sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
@ -2729,20 +2752,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount4; goto failed_mount4;
} }
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { jbd2_journal_set_features(sbi->s_journal,
jbd2_journal_set_features(sbi->s_journal, JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
JBD2_FEATURE_COMPAT_CHECKSUM, 0, if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
jbd2_journal_set_features(sbi->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
} else if (test_opt(sb, JOURNAL_CHECKSUM)) { else
jbd2_journal_set_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
jbd2_journal_clear_features(sbi->s_journal, 0, 0, jbd2_journal_clear_features(sbi->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
} else {
jbd2_journal_clear_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
}
/* We have now updated the journal if required, so we can /* We have now updated the journal if required, so we can
* validate the data journaling mode. */ * validate the data journaling mode. */
@ -3208,7 +3225,18 @@ static int ext4_commit_super(struct super_block *sb, int sync)
clear_buffer_write_io_error(sbh); clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh); set_buffer_uptodate(sbh);
} }
es->s_wtime = cpu_to_le32(get_seconds()); /*
* If the file system is mounted read-only, don't update the
* superblock write time. This avoids updating the superblock
* write time when we are mounting the root file system
* read/only but we need to replay the journal; at that point,
* for people who are east of GMT and who make their clock
* tick in localtime for Windows bug-for-bug compatibility,
* the clock is set in the future, and this will cause e2fsck
* to complain and force a full file system check.
*/
if (!(sb->s_flags & MS_RDONLY))
es->s_wtime = cpu_to_le32(get_seconds());
es->s_kbytes_written = es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
@ -3477,18 +3505,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (sbi->s_journal) if (sbi->s_journal)
ext4_mark_recovery_complete(sb, es); ext4_mark_recovery_complete(sb, es);
} else { } else {
int ret; /* Make sure we can mount this feature set readwrite */
if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, if (!ext4_feature_set_ok(sb, 0)) {
~EXT4_FEATURE_RO_COMPAT_SUPP))) {
ext4_msg(sb, KERN_WARNING, "couldn't "
"remount RDWR because of unsupported "
"optional features (%x)",
(le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
~EXT4_FEATURE_RO_COMPAT_SUPP));
err = -EROFS; err = -EROFS;
goto restore_opts; goto restore_opts;
} }
/* /*
* Make sure the group descriptor checksums * Make sure the group descriptor checksums
* are sane. If they aren't, refuse to remount r/w. * are sane. If they aren't, refuse to remount r/w.

View File

@ -810,12 +810,23 @@ inserted:
get_bh(new_bh); get_bh(new_bh);
} else { } else {
/* We need to allocate a new block */ /* We need to allocate a new block */
ext4_fsblk_t goal = ext4_group_first_block_no(sb, ext4_fsblk_t goal, block;
goal = ext4_group_first_block_no(sb,
EXT4_I(inode)->i_block_group); EXT4_I(inode)->i_block_group);
ext4_fsblk_t block = ext4_new_meta_blocks(handle, inode,
/* non-extent files can't have physical blocks past 2^32 */
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
block = ext4_new_meta_blocks(handle, inode,
goal, NULL, &error); goal, NULL, &error);
if (error) if (error)
goto cleanup; goto cleanup;
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
ea_idebug(inode, "creating block %d", block); ea_idebug(inode, "creating block %d", block);
new_bh = sb_getblk(sb, block); new_bh = sb_getblk(sb, block);

View File

@ -25,6 +25,7 @@
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/blkdev.h>
#include <trace/events/jbd2.h> #include <trace/events/jbd2.h>
/* /*
@ -133,8 +134,8 @@ static int journal_submit_commit_record(journal_t *journal,
bh->b_end_io = journal_end_buffer_io_sync; bh->b_end_io = journal_end_buffer_io_sync;
if (journal->j_flags & JBD2_BARRIER && if (journal->j_flags & JBD2_BARRIER &&
!JBD2_HAS_INCOMPAT_FEATURE(journal, !JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
set_buffer_ordered(bh); set_buffer_ordered(bh);
barrier_done = 1; barrier_done = 1;
} }
@ -706,11 +707,13 @@ start_journal_io:
/* Done it all: now write the commit record asynchronously. */ /* Done it all: now write the commit record asynchronously. */
if (JBD2_HAS_INCOMPAT_FEATURE(journal, if (JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
err = journal_submit_commit_record(journal, commit_transaction, err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum); &cbh, crc32_sum);
if (err) if (err)
__jbd2_journal_abort_hard(journal); __jbd2_journal_abort_hard(journal);
if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(journal->j_dev, NULL);
} }
/* /*
@ -833,7 +836,7 @@ wait_for_iobuf:
jbd_debug(3, "JBD: commit phase 5\n"); jbd_debug(3, "JBD: commit phase 5\n");
if (!JBD2_HAS_INCOMPAT_FEATURE(journal, if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
err = journal_submit_commit_record(journal, commit_transaction, err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum); &cbh, crc32_sum);
if (err) if (err)

View File

@ -1187,6 +1187,12 @@ static int journal_reset(journal_t *journal)
first = be32_to_cpu(sb->s_first); first = be32_to_cpu(sb->s_first);
last = be32_to_cpu(sb->s_maxlen); last = be32_to_cpu(sb->s_maxlen);
if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n",
first, last);
journal_fail_superblock(journal);
return -EINVAL;
}
journal->j_first = first; journal->j_first = first;
journal->j_last = last; journal->j_last = last;

View File

@ -57,7 +57,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
INIT_LIST_HEAD(&transaction->t_private_list); INIT_LIST_HEAD(&transaction->t_private_list);
/* Set up the commit timer for the new transaction. */ /* Set up the commit timer for the new transaction. */
journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires);
add_timer(&journal->j_commit_timer); add_timer(&journal->j_commit_timer);
J_ASSERT(journal->j_running_transaction == NULL); J_ASSERT(journal->j_running_transaction == NULL);
@ -238,6 +238,8 @@ repeat_locked:
__jbd2_log_space_left(journal)); __jbd2_log_space_left(journal));
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock); spin_unlock(&journal->j_state_lock);
lock_map_acquire(&handle->h_lockdep_map);
out: out:
if (unlikely(new_transaction)) /* It's usually NULL */ if (unlikely(new_transaction)) /* It's usually NULL */
kfree(new_transaction); kfree(new_transaction);
@ -303,8 +305,6 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
handle = ERR_PTR(err); handle = ERR_PTR(err);
goto out; goto out;
} }
lock_map_acquire(&handle->h_lockdep_map);
out: out:
return handle; return handle;
} }
@ -426,6 +426,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
__jbd2_log_start_commit(journal, transaction->t_tid); __jbd2_log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock); spin_unlock(&journal->j_state_lock);
lock_map_release(&handle->h_lockdep_map);
handle->h_buffer_credits = nblocks; handle->h_buffer_credits = nblocks;
ret = start_this_handle(journal, handle); ret = start_this_handle(journal, handle);
return ret; return ret;

View File

@ -652,7 +652,7 @@ struct transaction_s
* This transaction is being forced and some process is * This transaction is being forced and some process is
* waiting for it to finish. * waiting for it to finish.
*/ */
int t_synchronous_commit:1; unsigned int t_synchronous_commit:1;
/* /*
* For use by the filesystem to store fs-specific data * For use by the filesystem to store fs-specific data

View File

@ -5,10 +5,15 @@
#define _TRACE_EXT4_H #define _TRACE_EXT4_H
#include <linux/writeback.h> #include <linux/writeback.h>
#include "../../../fs/ext4/ext4.h"
#include "../../../fs/ext4/mballoc.h"
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
struct ext4_allocation_context;
struct ext4_allocation_request;
struct ext4_prealloc_space;
struct ext4_inode_info;
#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
TRACE_EVENT(ext4_free_inode, TRACE_EVENT(ext4_free_inode,
TP_PROTO(struct inode *inode), TP_PROTO(struct inode *inode),
@ -33,8 +38,8 @@ TRACE_EVENT(ext4_free_inode,
), ),
TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu", TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->uid, __entry->gid, __entry->mode, __entry->uid, __entry->gid,
(unsigned long long) __entry->blocks) (unsigned long long) __entry->blocks)
); );
@ -56,7 +61,8 @@ TRACE_EVENT(ext4_request_inode,
), ),
TP_printk("dev %s dir %lu mode %d", TP_printk("dev %s dir %lu mode %d",
jbd2_dev_to_name(__entry->dev), __entry->dir, __entry->mode) jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir,
__entry->mode)
); );
TRACE_EVENT(ext4_allocate_inode, TRACE_EVENT(ext4_allocate_inode,
@ -79,7 +85,8 @@ TRACE_EVENT(ext4_allocate_inode,
), ),
TP_printk("dev %s ino %lu dir %lu mode %d", TP_printk("dev %s ino %lu dir %lu mode %d",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->dir, __entry->mode) jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
(unsigned long) __entry->dir, __entry->mode)
); );
TRACE_EVENT(ext4_write_begin, TRACE_EVENT(ext4_write_begin,
@ -106,8 +113,8 @@ TRACE_EVENT(ext4_write_begin,
), ),
TP_printk("dev %s ino %lu pos %llu len %u flags %u", TP_printk("dev %s ino %lu pos %llu len %u flags %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->flags) __entry->pos, __entry->len, __entry->flags)
); );
TRACE_EVENT(ext4_ordered_write_end, TRACE_EVENT(ext4_ordered_write_end,
@ -133,8 +140,8 @@ TRACE_EVENT(ext4_ordered_write_end,
), ),
TP_printk("dev %s ino %lu pos %llu len %u copied %u", TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->copied) __entry->pos, __entry->len, __entry->copied)
); );
TRACE_EVENT(ext4_writeback_write_end, TRACE_EVENT(ext4_writeback_write_end,
@ -160,8 +167,8 @@ TRACE_EVENT(ext4_writeback_write_end,
), ),
TP_printk("dev %s ino %lu pos %llu len %u copied %u", TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->copied) __entry->pos, __entry->len, __entry->copied)
); );
TRACE_EVENT(ext4_journalled_write_end, TRACE_EVENT(ext4_journalled_write_end,
@ -186,8 +193,8 @@ TRACE_EVENT(ext4_journalled_write_end,
), ),
TP_printk("dev %s ino %lu pos %llu len %u copied %u", TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->copied) __entry->pos, __entry->len, __entry->copied)
); );
TRACE_EVENT(ext4_writepage, TRACE_EVENT(ext4_writepage,
@ -209,7 +216,8 @@ TRACE_EVENT(ext4_writepage,
), ),
TP_printk("dev %s ino %lu page_index %lu", TP_printk("dev %s ino %lu page_index %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index) jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->index)
); );
TRACE_EVENT(ext4_da_writepages, TRACE_EVENT(ext4_da_writepages,
@ -243,14 +251,49 @@ TRACE_EVENT(ext4_da_writepages,
__entry->range_cyclic = wbc->range_cyclic; __entry->range_cyclic = wbc->range_cyclic;
), ),
TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write, jbd2_dev_to_name(__entry->dev),
(unsigned long) __entry->ino, __entry->nr_to_write,
__entry->pages_skipped, __entry->range_start, __entry->pages_skipped, __entry->range_start,
__entry->range_end, __entry->nonblocking, __entry->range_end, __entry->nonblocking,
__entry->for_kupdate, __entry->for_reclaim, __entry->for_kupdate, __entry->for_reclaim,
__entry->range_cyclic) __entry->range_cyclic)
); );
TRACE_EVENT(ext4_da_write_pages,
TP_PROTO(struct inode *inode, struct mpage_da_data *mpd),
TP_ARGS(inode, mpd),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, b_blocknr )
__field( __u32, b_size )
__field( __u32, b_state )
__field( unsigned long, first_page )
__field( int, io_done )
__field( int, pages_written )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->b_blocknr = mpd->b_blocknr;
__entry->b_size = mpd->b_size;
__entry->b_state = mpd->b_state;
__entry->first_page = mpd->first_page;
__entry->io_done = mpd->io_done;
__entry->pages_written = mpd->pages_written;
),
TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->b_blocknr, __entry->b_size,
__entry->b_state, __entry->first_page,
__entry->io_done, __entry->pages_written)
);
TRACE_EVENT(ext4_da_writepages_result, TRACE_EVENT(ext4_da_writepages_result,
TP_PROTO(struct inode *inode, struct writeback_control *wbc, TP_PROTO(struct inode *inode, struct writeback_control *wbc,
int ret, int pages_written), int ret, int pages_written),
@ -280,7 +323,8 @@ TRACE_EVENT(ext4_da_writepages_result,
), ),
TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d", TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->ret, jbd2_dev_to_name(__entry->dev),
(unsigned long) __entry->ino, __entry->ret,
__entry->pages_written, __entry->pages_skipped, __entry->pages_written, __entry->pages_skipped,
__entry->encountered_congestion, __entry->more_io, __entry->encountered_congestion, __entry->more_io,
__entry->no_nrwrite_index_update) __entry->no_nrwrite_index_update)
@ -309,8 +353,8 @@ TRACE_EVENT(ext4_da_write_begin,
), ),
TP_printk("dev %s ino %lu pos %llu len %u flags %u", TP_printk("dev %s ino %lu pos %llu len %u flags %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->flags) __entry->pos, __entry->len, __entry->flags)
); );
TRACE_EVENT(ext4_da_write_end, TRACE_EVENT(ext4_da_write_end,
@ -336,8 +380,8 @@ TRACE_EVENT(ext4_da_write_end,
), ),
TP_printk("dev %s ino %lu pos %llu len %u copied %u", TP_printk("dev %s ino %lu pos %llu len %u copied %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->copied) __entry->pos, __entry->len, __entry->copied)
); );
TRACE_EVENT(ext4_discard_blocks, TRACE_EVENT(ext4_discard_blocks,
@ -387,8 +431,8 @@ TRACE_EVENT(ext4_mb_new_inode_pa,
), ),
TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->pa_len, __entry->pa_lstart) __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
); );
TRACE_EVENT(ext4_mb_new_group_pa, TRACE_EVENT(ext4_mb_new_group_pa,
@ -415,8 +459,8 @@ TRACE_EVENT(ext4_mb_new_group_pa,
), ),
TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->pa_len, __entry->pa_lstart) __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
); );
TRACE_EVENT(ext4_mb_release_inode_pa, TRACE_EVENT(ext4_mb_release_inode_pa,
@ -442,8 +486,8 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
), ),
TP_printk("dev %s ino %lu block %llu count %u", TP_printk("dev %s ino %lu block %llu count %u",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->count) __entry->block, __entry->count)
); );
TRACE_EVENT(ext4_mb_release_group_pa, TRACE_EVENT(ext4_mb_release_group_pa,
@ -488,7 +532,7 @@ TRACE_EVENT(ext4_discard_preallocations,
), ),
TP_printk("dev %s ino %lu", TP_printk("dev %s ino %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino) jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
); );
TRACE_EVENT(ext4_mb_discard_preallocations, TRACE_EVENT(ext4_mb_discard_preallocations,
@ -543,8 +587,8 @@ TRACE_EVENT(ext4_request_blocks,
), ),
TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->len, __entry->flags, __entry->len,
(unsigned long long) __entry->logical, (unsigned long long) __entry->logical,
(unsigned long long) __entry->goal, (unsigned long long) __entry->goal,
(unsigned long long) __entry->lleft, (unsigned long long) __entry->lleft,
@ -587,8 +631,8 @@ TRACE_EVENT(ext4_allocate_blocks,
), ),
TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->len, __entry->block, __entry->flags, __entry->len, __entry->block,
(unsigned long long) __entry->logical, (unsigned long long) __entry->logical,
(unsigned long long) __entry->goal, (unsigned long long) __entry->goal,
(unsigned long long) __entry->lleft, (unsigned long long) __entry->lleft,
@ -621,8 +665,8 @@ TRACE_EVENT(ext4_free_blocks,
), ),
TP_printk("dev %s ino %lu block %llu count %lu metadata %d", TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->count, __entry->metadata) __entry->block, __entry->count, __entry->metadata)
); );
TRACE_EVENT(ext4_sync_file, TRACE_EVENT(ext4_sync_file,
@ -645,8 +689,8 @@ TRACE_EVENT(ext4_sync_file,
), ),
TP_printk("dev %s ino %ld parent %ld datasync %d ", TP_printk("dev %s ino %ld parent %ld datasync %d ",
jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->parent, jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->datasync) (unsigned long) __entry->parent, __entry->datasync)
); );
TRACE_EVENT(ext4_sync_fs, TRACE_EVENT(ext4_sync_fs,
@ -669,6 +713,30 @@ TRACE_EVENT(ext4_sync_fs,
__entry->wait) __entry->wait)
); );
TRACE_EVENT(ext4_alloc_da_blocks,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( unsigned int, data_blocks )
__field( unsigned int, meta_blocks )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
__entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
),
TP_printk("dev %s ino %lu data_blocks %u meta_blocks %u",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->data_blocks, __entry->meta_blocks)
);
#endif /* _TRACE_EXT4_H */ #endif /* _TRACE_EXT4_H */
/* This part must be outside protection */ /* This part must be outside protection */

View File

@ -159,7 +159,7 @@ TRACE_EVENT(jbd2_submit_inode_data,
), ),
TP_printk("dev %s ino %lu", TP_printk("dev %s ino %lu",
jbd2_dev_to_name(__entry->dev), __entry->ino) jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
); );
#endif /* _TRACE_JBD2_H */ #endif /* _TRACE_JBD2_H */