Various cleanups and bug fixes in ext4's extent status tree,
journalling, and block allocator subsystems. Also improve performance for parallel DIO overwrites. -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmSaIWAACgkQ8vlZVpUN gaODEAf9GLk68DvU9iOhgJ1p/lMIqtbY0vvB1aeiQg7Z99mk/Vc//R5qQvtO2oN5 9G4OMSGKoUO0x9OlvDIw6za1BsE1pGHyBLmei7PO1JpHop6b6hKj+WQVPWb43v15 TI0vIkWzwJI2eIxsTqvpMkgwZ3aNL9c52xFyjwk/6lAsw4y2wxEls/NZhhE2tAXF w/RFmI9RC/AZy1JX3VeruzeiSvAq+JAnsW8iNIoN5nBvWU7yXLA3b4mcoWWrCQ5E sKqOkhTeobhYsAie6dxGhri/JrL1HwPOpJ8SWWmrlLWXoMVx1rXxW3OnxIAEl9sz 05n7Z+6LvI6aEk+rnjCqt4Z1cpIIEA== =cAq/ -----END PGP SIGNATURE----- Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 updates from Ted Ts'o: "Various cleanups and bug fixes in ext4's extent status tree, journalling, and block allocator subsystems. Also improve performance for parallel DIO overwrites" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (55 commits) ext4: avoid updating the superblock on a r/o mount if not needed jbd2: skip reading super block if it has been verified ext4: fix to check return value of freeze_bdev() in ext4_shutdown() ext4: refactoring to use the unified helper ext4_quotas_off() ext4: turn quotas off if mount failed after enabling quotas ext4: update doc about journal superblock description ext4: add journal cycled recording support jbd2: continue to record log between each mount jbd2: remove j_format_version jbd2: factor out journal initialization from journal_get_superblock() jbd2: switch to check format version in superblock directly jbd2: remove unused feature macros ext4: ext4_put_super: Remove redundant checking for 'sbi->s_journal_bdev' ext4: Fix reusing stale buffer heads from last failed mounting ext4: allow concurrent unaligned dio overwrites ext4: clean up mballoc criteria comments ext4: make ext4_zeroout_es() return void ext4: make ext4_es_insert_extent() return void ext4: make ext4_es_insert_delayed_block() return void ext4: make ext4_es_remove_extent() return void ...
This commit is contained in:
commit
53ea167b21
|
@ -260,8 +260,13 @@ which is 1024 bytes long:
|
||||||
- s_num_fc_blocks
|
- s_num_fc_blocks
|
||||||
- Number of fast commit blocks in the journal.
|
- Number of fast commit blocks in the journal.
|
||||||
* - 0x58
|
* - 0x58
|
||||||
|
- __be32
|
||||||
|
- s_head
|
||||||
|
- Block number of the head (first unused block) of the journal, only
|
||||||
|
up-to-date when the journal is empty.
|
||||||
|
* - 0x5C
|
||||||
- __u32
|
- __u32
|
||||||
- s_padding[42]
|
- s_padding[40]
|
||||||
-
|
-
|
||||||
* - 0xFC
|
* - 0xFC
|
||||||
- __be32
|
- __be32
|
||||||
|
|
|
@ -127,6 +127,58 @@ enum SHIFT_DIRECTION {
|
||||||
SHIFT_RIGHT,
|
SHIFT_RIGHT,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For each criteria, mballoc has slightly different way of finding
|
||||||
|
* the required blocks nad usually, higher the criteria the slower the
|
||||||
|
* allocation. We start at lower criterias and keep falling back to
|
||||||
|
* higher ones if we are not able to find any blocks. Lower (earlier)
|
||||||
|
* criteria are faster.
|
||||||
|
*/
|
||||||
|
enum criteria {
|
||||||
|
/*
|
||||||
|
* Used when number of blocks needed is a power of 2. This
|
||||||
|
* doesn't trigger any disk IO except prefetch and is the
|
||||||
|
* fastest criteria.
|
||||||
|
*/
|
||||||
|
CR_POWER2_ALIGNED,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tries to lookup in-memory data structures to find the most
|
||||||
|
* suitable group that satisfies goal request. No disk IO
|
||||||
|
* except block prefetch.
|
||||||
|
*/
|
||||||
|
CR_GOAL_LEN_FAST,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Same as CR_GOAL_LEN_FAST but is allowed to reduce the goal
|
||||||
|
* length to the best available length for faster allocation.
|
||||||
|
*/
|
||||||
|
CR_BEST_AVAIL_LEN,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reads each block group sequentially, performing disk IO if
|
||||||
|
* necessary, to find find_suitable block group. Tries to
|
||||||
|
* allocate goal length but might trim the request if nothing
|
||||||
|
* is found after enough tries.
|
||||||
|
*/
|
||||||
|
CR_GOAL_LEN_SLOW,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finds the first free set of blocks and allocates
|
||||||
|
* those. This is only used in rare cases when
|
||||||
|
* CR_GOAL_LEN_SLOW also fails to allocate anything.
|
||||||
|
*/
|
||||||
|
CR_ANY_FREE,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of criterias defined.
|
||||||
|
*/
|
||||||
|
EXT4_MB_NUM_CRS
|
||||||
|
};
|
||||||
|
|
||||||
|
/* criteria below which we use fast block scanning and avoid unnecessary IO */
|
||||||
|
#define CR_FAST CR_GOAL_LEN_SLOW
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flags used in mballoc's allocation_context flags field.
|
* Flags used in mballoc's allocation_context flags field.
|
||||||
*
|
*
|
||||||
|
@ -165,9 +217,12 @@ enum SHIFT_DIRECTION {
|
||||||
/* Do strict check for free blocks while retrying block allocation */
|
/* Do strict check for free blocks while retrying block allocation */
|
||||||
#define EXT4_MB_STRICT_CHECK 0x4000
|
#define EXT4_MB_STRICT_CHECK 0x4000
|
||||||
/* Large fragment size list lookup succeeded at least once for cr = 0 */
|
/* Large fragment size list lookup succeeded at least once for cr = 0 */
|
||||||
#define EXT4_MB_CR0_OPTIMIZED 0x8000
|
#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
|
||||||
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
|
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
|
||||||
#define EXT4_MB_CR1_OPTIMIZED 0x00010000
|
#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
|
||||||
|
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1.5 */
|
||||||
|
#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000
|
||||||
|
|
||||||
struct ext4_allocation_request {
|
struct ext4_allocation_request {
|
||||||
/* target inode for block we're allocating */
|
/* target inode for block we're allocating */
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
|
@ -1532,21 +1587,25 @@ struct ext4_sb_info {
|
||||||
unsigned long s_mb_last_start;
|
unsigned long s_mb_last_start;
|
||||||
unsigned int s_mb_prefetch;
|
unsigned int s_mb_prefetch;
|
||||||
unsigned int s_mb_prefetch_limit;
|
unsigned int s_mb_prefetch_limit;
|
||||||
|
unsigned int s_mb_best_avail_max_trim_order;
|
||||||
|
|
||||||
/* stats for buddy allocator */
|
/* stats for buddy allocator */
|
||||||
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
|
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
|
||||||
atomic_t s_bal_success; /* we found long enough chunks */
|
atomic_t s_bal_success; /* we found long enough chunks */
|
||||||
atomic_t s_bal_allocated; /* in blocks */
|
atomic_t s_bal_allocated; /* in blocks */
|
||||||
atomic_t s_bal_ex_scanned; /* total extents scanned */
|
atomic_t s_bal_ex_scanned; /* total extents scanned */
|
||||||
|
atomic_t s_bal_cX_ex_scanned[EXT4_MB_NUM_CRS]; /* total extents scanned */
|
||||||
atomic_t s_bal_groups_scanned; /* number of groups scanned */
|
atomic_t s_bal_groups_scanned; /* number of groups scanned */
|
||||||
atomic_t s_bal_goals; /* goal hits */
|
atomic_t s_bal_goals; /* goal hits */
|
||||||
|
atomic_t s_bal_len_goals; /* len goal hits */
|
||||||
atomic_t s_bal_breaks; /* too long searches */
|
atomic_t s_bal_breaks; /* too long searches */
|
||||||
atomic_t s_bal_2orders; /* 2^order hits */
|
atomic_t s_bal_2orders; /* 2^order hits */
|
||||||
atomic_t s_bal_cr0_bad_suggestions;
|
atomic_t s_bal_p2_aligned_bad_suggestions;
|
||||||
atomic_t s_bal_cr1_bad_suggestions;
|
atomic_t s_bal_goal_fast_bad_suggestions;
|
||||||
atomic64_t s_bal_cX_groups_considered[4];
|
atomic_t s_bal_best_avail_bad_suggestions;
|
||||||
atomic64_t s_bal_cX_hits[4];
|
atomic64_t s_bal_cX_groups_considered[EXT4_MB_NUM_CRS];
|
||||||
atomic64_t s_bal_cX_failed[4]; /* cX loop didn't find blocks */
|
atomic64_t s_bal_cX_hits[EXT4_MB_NUM_CRS];
|
||||||
|
atomic64_t s_bal_cX_failed[EXT4_MB_NUM_CRS]; /* cX loop didn't find blocks */
|
||||||
atomic_t s_mb_buddies_generated; /* number of buddies generated */
|
atomic_t s_mb_buddies_generated; /* number of buddies generated */
|
||||||
atomic64_t s_mb_generation_time;
|
atomic64_t s_mb_generation_time;
|
||||||
atomic_t s_mb_lost_chunks;
|
atomic_t s_mb_lost_chunks;
|
||||||
|
@ -2632,10 +2691,6 @@ extern void ext4_get_group_no_and_offset(struct super_block *sb,
|
||||||
extern ext4_group_t ext4_get_group_number(struct super_block *sb,
|
extern ext4_group_t ext4_get_group_number(struct super_block *sb,
|
||||||
ext4_fsblk_t block);
|
ext4_fsblk_t block);
|
||||||
|
|
||||||
extern unsigned int ext4_block_group(struct super_block *sb,
|
|
||||||
ext4_fsblk_t blocknr);
|
|
||||||
extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
|
|
||||||
ext4_fsblk_t blocknr);
|
|
||||||
extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
|
extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
|
||||||
extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
|
extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
|
||||||
ext4_group_t group);
|
ext4_group_t group);
|
||||||
|
@ -2841,8 +2896,6 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
|
||||||
/* mballoc.c */
|
/* mballoc.c */
|
||||||
extern const struct seq_operations ext4_mb_seq_groups_ops;
|
extern const struct seq_operations ext4_mb_seq_groups_ops;
|
||||||
extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
|
extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
|
||||||
extern long ext4_mb_stats;
|
|
||||||
extern long ext4_mb_max_to_scan;
|
|
||||||
extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
|
extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
|
||||||
extern int ext4_mb_init(struct super_block *);
|
extern int ext4_mb_init(struct super_block *);
|
||||||
extern int ext4_mb_release(struct super_block *);
|
extern int ext4_mb_release(struct super_block *);
|
||||||
|
@ -3481,14 +3534,8 @@ extern int ext4_try_to_write_inline_data(struct address_space *mapping,
|
||||||
struct inode *inode,
|
struct inode *inode,
|
||||||
loff_t pos, unsigned len,
|
loff_t pos, unsigned len,
|
||||||
struct page **pagep);
|
struct page **pagep);
|
||||||
extern int ext4_write_inline_data_end(struct inode *inode,
|
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
|
||||||
loff_t pos, unsigned len,
|
unsigned copied, struct folio *folio);
|
||||||
unsigned copied,
|
|
||||||
struct page *page);
|
|
||||||
extern struct buffer_head *
|
|
||||||
ext4_journalled_write_inline_data(struct inode *inode,
|
|
||||||
unsigned len,
|
|
||||||
struct page *page);
|
|
||||||
extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
|
extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
|
||||||
struct inode *inode,
|
struct inode *inode,
|
||||||
loff_t pos, unsigned len,
|
loff_t pos, unsigned len,
|
||||||
|
|
|
@ -3123,7 +3123,7 @@ void ext4_ext_release(struct super_block *sb)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
|
static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
|
||||||
{
|
{
|
||||||
ext4_lblk_t ee_block;
|
ext4_lblk_t ee_block;
|
||||||
ext4_fsblk_t ee_pblock;
|
ext4_fsblk_t ee_pblock;
|
||||||
|
@ -3134,10 +3134,10 @@ static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
|
||||||
ee_pblock = ext4_ext_pblock(ex);
|
ee_pblock = ext4_ext_pblock(ex);
|
||||||
|
|
||||||
if (ee_len == 0)
|
if (ee_len == 0)
|
||||||
return 0;
|
return;
|
||||||
|
|
||||||
return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
|
ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
|
||||||
EXTENT_STATUS_WRITTEN);
|
EXTENT_STATUS_WRITTEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* FIXME!! we need to try to merge to left or right after zero-out */
|
/* FIXME!! we need to try to merge to left or right after zero-out */
|
||||||
|
@ -3287,7 +3287,7 @@ static int ext4_split_extent_at(handle_t *handle,
|
||||||
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
|
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
|
||||||
if (!err)
|
if (!err)
|
||||||
/* update extent status tree */
|
/* update extent status tree */
|
||||||
err = ext4_zeroout_es(inode, &zero_ex);
|
ext4_zeroout_es(inode, &zero_ex);
|
||||||
/* If we failed at this point, we don't know in which
|
/* If we failed at this point, we don't know in which
|
||||||
* state the extent tree exactly is so don't try to fix
|
* state the extent tree exactly is so don't try to fix
|
||||||
* length of the original extent as it may do even more
|
* length of the original extent as it may do even more
|
||||||
|
@ -3640,9 +3640,8 @@ fallback:
|
||||||
out:
|
out:
|
||||||
/* If we have gotten a failure, don't zero out status tree */
|
/* If we have gotten a failure, don't zero out status tree */
|
||||||
if (!err) {
|
if (!err) {
|
||||||
err = ext4_zeroout_es(inode, &zero_ex1);
|
ext4_zeroout_es(inode, &zero_ex1);
|
||||||
if (!err)
|
ext4_zeroout_es(inode, &zero_ex2);
|
||||||
err = ext4_zeroout_es(inode, &zero_ex2);
|
|
||||||
}
|
}
|
||||||
return err ? err : allocated;
|
return err ? err : allocated;
|
||||||
}
|
}
|
||||||
|
@ -4403,15 +4402,8 @@ int ext4_ext_truncate(handle_t *handle, struct inode *inode)
|
||||||
|
|
||||||
last_block = (inode->i_size + sb->s_blocksize - 1)
|
last_block = (inode->i_size + sb->s_blocksize - 1)
|
||||||
>> EXT4_BLOCK_SIZE_BITS(sb);
|
>> EXT4_BLOCK_SIZE_BITS(sb);
|
||||||
retry:
|
ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
|
||||||
err = ext4_es_remove_extent(inode, last_block,
|
|
||||||
EXT_MAX_BLOCKS - last_block);
|
|
||||||
if (err == -ENOMEM) {
|
|
||||||
memalloc_retry_wait(GFP_ATOMIC);
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
retry_remove_space:
|
retry_remove_space:
|
||||||
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
|
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
|
||||||
if (err == -ENOMEM) {
|
if (err == -ENOMEM) {
|
||||||
|
@ -5363,13 +5355,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
|
||||||
|
|
||||||
down_write(&EXT4_I(inode)->i_data_sem);
|
down_write(&EXT4_I(inode)->i_data_sem);
|
||||||
ext4_discard_preallocations(inode, 0);
|
ext4_discard_preallocations(inode, 0);
|
||||||
|
ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start);
|
||||||
ret = ext4_es_remove_extent(inode, punch_start,
|
|
||||||
EXT_MAX_BLOCKS - punch_start);
|
|
||||||
if (ret) {
|
|
||||||
up_write(&EXT4_I(inode)->i_data_sem);
|
|
||||||
goto out_stop;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
|
ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -5547,12 +5533,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
|
||||||
ext4_free_ext_path(path);
|
ext4_free_ext_path(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = ext4_es_remove_extent(inode, offset_lblk,
|
ext4_es_remove_extent(inode, offset_lblk, EXT_MAX_BLOCKS - offset_lblk);
|
||||||
EXT_MAX_BLOCKS - offset_lblk);
|
|
||||||
if (ret) {
|
|
||||||
up_write(&EXT4_I(inode)->i_data_sem);
|
|
||||||
goto out_stop;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if offset_lblk lies in a hole which is at start of file, use
|
* if offset_lblk lies in a hole which is at start of file, use
|
||||||
|
@ -5610,12 +5591,8 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
|
||||||
BUG_ON(!inode_is_locked(inode1));
|
BUG_ON(!inode_is_locked(inode1));
|
||||||
BUG_ON(!inode_is_locked(inode2));
|
BUG_ON(!inode_is_locked(inode2));
|
||||||
|
|
||||||
*erp = ext4_es_remove_extent(inode1, lblk1, count);
|
ext4_es_remove_extent(inode1, lblk1, count);
|
||||||
if (unlikely(*erp))
|
ext4_es_remove_extent(inode2, lblk2, count);
|
||||||
return 0;
|
|
||||||
*erp = ext4_es_remove_extent(inode2, lblk2, count);
|
|
||||||
if (unlikely(*erp))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
while (count) {
|
while (count) {
|
||||||
struct ext4_extent *ex1, *ex2, tmp_ex;
|
struct ext4_extent *ex1, *ex2, tmp_ex;
|
||||||
|
|
|
@ -144,9 +144,11 @@
|
||||||
static struct kmem_cache *ext4_es_cachep;
|
static struct kmem_cache *ext4_es_cachep;
|
||||||
static struct kmem_cache *ext4_pending_cachep;
|
static struct kmem_cache *ext4_pending_cachep;
|
||||||
|
|
||||||
static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
|
static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
|
||||||
|
struct extent_status *prealloc);
|
||||||
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_lblk_t end, int *reserved);
|
ext4_lblk_t end, int *reserved,
|
||||||
|
struct extent_status *prealloc);
|
||||||
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
|
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
|
||||||
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
||||||
struct ext4_inode_info *locked_ei);
|
struct ext4_inode_info *locked_ei);
|
||||||
|
@ -446,22 +448,36 @@ static void ext4_es_list_del(struct inode *inode)
|
||||||
spin_unlock(&sbi->s_es_lock);
|
spin_unlock(&sbi->s_es_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct extent_status *
|
/*
|
||||||
ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
|
* Returns true if we cannot fail to allocate memory for this extent_status
|
||||||
ext4_fsblk_t pblk)
|
* entry and cannot reclaim it until its status changes.
|
||||||
|
*/
|
||||||
|
static inline bool ext4_es_must_keep(struct extent_status *es)
|
||||||
|
{
|
||||||
|
/* fiemap, bigalloc, and seek_data/hole need to use it. */
|
||||||
|
if (ext4_es_is_delayed(es))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct extent_status *__es_alloc_extent(bool nofail)
|
||||||
|
{
|
||||||
|
if (!nofail)
|
||||||
|
return kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
|
||||||
|
|
||||||
|
return kmem_cache_zalloc(ext4_es_cachep, GFP_KERNEL | __GFP_NOFAIL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ext4_es_init_extent(struct inode *inode, struct extent_status *es,
|
||||||
|
ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk)
|
||||||
{
|
{
|
||||||
struct extent_status *es;
|
|
||||||
es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
|
|
||||||
if (es == NULL)
|
|
||||||
return NULL;
|
|
||||||
es->es_lblk = lblk;
|
es->es_lblk = lblk;
|
||||||
es->es_len = len;
|
es->es_len = len;
|
||||||
es->es_pblk = pblk;
|
es->es_pblk = pblk;
|
||||||
|
|
||||||
/*
|
/* We never try to reclaim a must kept extent, so we don't count it. */
|
||||||
* We don't count delayed extent because we never try to reclaim them
|
if (!ext4_es_must_keep(es)) {
|
||||||
*/
|
|
||||||
if (!ext4_es_is_delayed(es)) {
|
|
||||||
if (!EXT4_I(inode)->i_es_shk_nr++)
|
if (!EXT4_I(inode)->i_es_shk_nr++)
|
||||||
ext4_es_list_add(inode);
|
ext4_es_list_add(inode);
|
||||||
percpu_counter_inc(&EXT4_SB(inode->i_sb)->
|
percpu_counter_inc(&EXT4_SB(inode->i_sb)->
|
||||||
|
@ -470,8 +486,11 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
|
||||||
|
|
||||||
EXT4_I(inode)->i_es_all_nr++;
|
EXT4_I(inode)->i_es_all_nr++;
|
||||||
percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
|
percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
|
||||||
|
}
|
||||||
|
|
||||||
return es;
|
static inline void __es_free_extent(struct extent_status *es)
|
||||||
|
{
|
||||||
|
kmem_cache_free(ext4_es_cachep, es);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
|
static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
|
||||||
|
@ -479,8 +498,8 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
|
||||||
EXT4_I(inode)->i_es_all_nr--;
|
EXT4_I(inode)->i_es_all_nr--;
|
||||||
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
|
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
|
||||||
|
|
||||||
/* Decrease the shrink counter when this es is not delayed */
|
/* Decrease the shrink counter when we can reclaim the extent. */
|
||||||
if (!ext4_es_is_delayed(es)) {
|
if (!ext4_es_must_keep(es)) {
|
||||||
BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
|
BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
|
||||||
if (!--EXT4_I(inode)->i_es_shk_nr)
|
if (!--EXT4_I(inode)->i_es_shk_nr)
|
||||||
ext4_es_list_del(inode);
|
ext4_es_list_del(inode);
|
||||||
|
@ -488,7 +507,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
|
||||||
s_es_stats.es_stats_shk_cnt);
|
s_es_stats.es_stats_shk_cnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
kmem_cache_free(ext4_es_cachep, es);
|
__es_free_extent(es);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -749,7 +768,8 @@ static inline void ext4_es_insert_extent_check(struct inode *inode,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
|
static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
|
||||||
|
struct extent_status *prealloc)
|
||||||
{
|
{
|
||||||
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
|
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
|
||||||
struct rb_node **p = &tree->root.rb_node;
|
struct rb_node **p = &tree->root.rb_node;
|
||||||
|
@ -789,10 +809,15 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
|
if (prealloc)
|
||||||
newes->es_pblk);
|
es = prealloc;
|
||||||
|
else
|
||||||
|
es = __es_alloc_extent(false);
|
||||||
if (!es)
|
if (!es)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
ext4_es_init_extent(inode, es, newes->es_lblk, newes->es_len,
|
||||||
|
newes->es_pblk);
|
||||||
|
|
||||||
rb_link_node(&es->rb_node, parent, p);
|
rb_link_node(&es->rb_node, parent, p);
|
||||||
rb_insert_color(&es->rb_node, &tree->root);
|
rb_insert_color(&es->rb_node, &tree->root);
|
||||||
|
|
||||||
|
@ -804,26 +829,27 @@ out:
|
||||||
/*
|
/*
|
||||||
* ext4_es_insert_extent() adds information to an inode's extent
|
* ext4_es_insert_extent() adds information to an inode's extent
|
||||||
* status tree.
|
* status tree.
|
||||||
*
|
|
||||||
* Return 0 on success, error code on failure.
|
|
||||||
*/
|
*/
|
||||||
int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_lblk_t len, ext4_fsblk_t pblk,
|
ext4_lblk_t len, ext4_fsblk_t pblk,
|
||||||
unsigned int status)
|
unsigned int status)
|
||||||
{
|
{
|
||||||
struct extent_status newes;
|
struct extent_status newes;
|
||||||
ext4_lblk_t end = lblk + len - 1;
|
ext4_lblk_t end = lblk + len - 1;
|
||||||
int err = 0;
|
int err1 = 0;
|
||||||
|
int err2 = 0;
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
|
struct extent_status *es1 = NULL;
|
||||||
|
struct extent_status *es2 = NULL;
|
||||||
|
|
||||||
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
||||||
return 0;
|
return;
|
||||||
|
|
||||||
es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
|
es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
|
||||||
lblk, len, pblk, status, inode->i_ino);
|
lblk, len, pblk, status, inode->i_ino);
|
||||||
|
|
||||||
if (!len)
|
if (!len)
|
||||||
return 0;
|
return;
|
||||||
|
|
||||||
BUG_ON(end < lblk);
|
BUG_ON(end < lblk);
|
||||||
|
|
||||||
|
@ -842,29 +868,40 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
|
|
||||||
ext4_es_insert_extent_check(inode, &newes);
|
ext4_es_insert_extent_check(inode, &newes);
|
||||||
|
|
||||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
|
||||||
err = __es_remove_extent(inode, lblk, end, NULL);
|
|
||||||
if (err != 0)
|
|
||||||
goto error;
|
|
||||||
retry:
|
retry:
|
||||||
err = __es_insert_extent(inode, &newes);
|
if (err1 && !es1)
|
||||||
if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
|
es1 = __es_alloc_extent(true);
|
||||||
128, EXT4_I(inode)))
|
if ((err1 || err2) && !es2)
|
||||||
goto retry;
|
es2 = __es_alloc_extent(true);
|
||||||
if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
|
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||||
err = 0;
|
|
||||||
|
err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
|
||||||
|
if (err1 != 0)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
err2 = __es_insert_extent(inode, &newes, es2);
|
||||||
|
if (err2 == -ENOMEM && !ext4_es_must_keep(&newes))
|
||||||
|
err2 = 0;
|
||||||
|
if (err2 != 0)
|
||||||
|
goto error;
|
||||||
|
|
||||||
if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
|
if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
|
||||||
(status & EXTENT_STATUS_WRITTEN ||
|
(status & EXTENT_STATUS_WRITTEN ||
|
||||||
status & EXTENT_STATUS_UNWRITTEN))
|
status & EXTENT_STATUS_UNWRITTEN))
|
||||||
__revise_pending(inode, lblk, len);
|
__revise_pending(inode, lblk, len);
|
||||||
|
|
||||||
|
/* es is pre-allocated but not used, free it. */
|
||||||
|
if (es1 && !es1->es_len)
|
||||||
|
__es_free_extent(es1);
|
||||||
|
if (es2 && !es2->es_len)
|
||||||
|
__es_free_extent(es2);
|
||||||
error:
|
error:
|
||||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||||
|
if (err1 || err2)
|
||||||
|
goto retry;
|
||||||
|
|
||||||
ext4_es_print_tree(inode);
|
ext4_es_print_tree(inode);
|
||||||
|
return;
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -897,7 +934,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
|
|
||||||
es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
|
es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
|
||||||
if (!es || es->es_lblk > end)
|
if (!es || es->es_lblk > end)
|
||||||
__es_insert_extent(inode, &newes);
|
__es_insert_extent(inode, &newes, NULL);
|
||||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1287,6 +1324,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
|
||||||
* @lblk - first block in range
|
* @lblk - first block in range
|
||||||
* @end - last block in range
|
* @end - last block in range
|
||||||
* @reserved - number of cluster reservations released
|
* @reserved - number of cluster reservations released
|
||||||
|
* @prealloc - pre-allocated es to avoid memory allocation failures
|
||||||
*
|
*
|
||||||
* If @reserved is not NULL and delayed allocation is enabled, counts
|
* If @reserved is not NULL and delayed allocation is enabled, counts
|
||||||
* block/cluster reservations freed by removing range and if bigalloc
|
* block/cluster reservations freed by removing range and if bigalloc
|
||||||
|
@ -1294,7 +1332,8 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
|
||||||
* error code on failure.
|
* error code on failure.
|
||||||
*/
|
*/
|
||||||
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_lblk_t end, int *reserved)
|
ext4_lblk_t end, int *reserved,
|
||||||
|
struct extent_status *prealloc)
|
||||||
{
|
{
|
||||||
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
|
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
|
||||||
struct rb_node *node;
|
struct rb_node *node;
|
||||||
|
@ -1302,14 +1341,12 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
struct extent_status orig_es;
|
struct extent_status orig_es;
|
||||||
ext4_lblk_t len1, len2;
|
ext4_lblk_t len1, len2;
|
||||||
ext4_fsblk_t block;
|
ext4_fsblk_t block;
|
||||||
int err;
|
int err = 0;
|
||||||
bool count_reserved = true;
|
bool count_reserved = true;
|
||||||
struct rsvd_count rc;
|
struct rsvd_count rc;
|
||||||
|
|
||||||
if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
|
if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
|
||||||
count_reserved = false;
|
count_reserved = false;
|
||||||
retry:
|
|
||||||
err = 0;
|
|
||||||
|
|
||||||
es = __es_tree_search(&tree->root, lblk);
|
es = __es_tree_search(&tree->root, lblk);
|
||||||
if (!es)
|
if (!es)
|
||||||
|
@ -1343,14 +1380,13 @@ retry:
|
||||||
orig_es.es_len - len2;
|
orig_es.es_len - len2;
|
||||||
ext4_es_store_pblock_status(&newes, block,
|
ext4_es_store_pblock_status(&newes, block,
|
||||||
ext4_es_status(&orig_es));
|
ext4_es_status(&orig_es));
|
||||||
err = __es_insert_extent(inode, &newes);
|
err = __es_insert_extent(inode, &newes, prealloc);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
if (!ext4_es_must_keep(&newes))
|
||||||
|
return 0;
|
||||||
|
|
||||||
es->es_lblk = orig_es.es_lblk;
|
es->es_lblk = orig_es.es_lblk;
|
||||||
es->es_len = orig_es.es_len;
|
es->es_len = orig_es.es_len;
|
||||||
if ((err == -ENOMEM) &&
|
|
||||||
__es_shrink(EXT4_SB(inode->i_sb),
|
|
||||||
128, EXT4_I(inode)))
|
|
||||||
goto retry;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -1422,39 +1458,48 @@ out:
|
||||||
* @len - number of blocks to remove
|
* @len - number of blocks to remove
|
||||||
*
|
*
|
||||||
* Reduces block/cluster reservation count and for bigalloc cancels pending
|
* Reduces block/cluster reservation count and for bigalloc cancels pending
|
||||||
* reservations as needed. Returns 0 on success, error code on failure.
|
* reservations as needed.
|
||||||
*/
|
*/
|
||||||
int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_lblk_t len)
|
ext4_lblk_t len)
|
||||||
{
|
{
|
||||||
ext4_lblk_t end;
|
ext4_lblk_t end;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
int reserved = 0;
|
int reserved = 0;
|
||||||
|
struct extent_status *es = NULL;
|
||||||
|
|
||||||
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
||||||
return 0;
|
return;
|
||||||
|
|
||||||
trace_ext4_es_remove_extent(inode, lblk, len);
|
trace_ext4_es_remove_extent(inode, lblk, len);
|
||||||
es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
|
es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
|
||||||
lblk, len, inode->i_ino);
|
lblk, len, inode->i_ino);
|
||||||
|
|
||||||
if (!len)
|
if (!len)
|
||||||
return err;
|
return;
|
||||||
|
|
||||||
end = lblk + len - 1;
|
end = lblk + len - 1;
|
||||||
BUG_ON(end < lblk);
|
BUG_ON(end < lblk);
|
||||||
|
|
||||||
|
retry:
|
||||||
|
if (err && !es)
|
||||||
|
es = __es_alloc_extent(true);
|
||||||
/*
|
/*
|
||||||
* ext4_clear_inode() depends on us taking i_es_lock unconditionally
|
* ext4_clear_inode() depends on us taking i_es_lock unconditionally
|
||||||
* so that we are sure __es_shrink() is done with the inode before it
|
* so that we are sure __es_shrink() is done with the inode before it
|
||||||
* is reclaimed.
|
* is reclaimed.
|
||||||
*/
|
*/
|
||||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||||
err = __es_remove_extent(inode, lblk, end, &reserved);
|
err = __es_remove_extent(inode, lblk, end, &reserved, es);
|
||||||
|
if (es && !es->es_len)
|
||||||
|
__es_free_extent(es);
|
||||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||||
|
if (err)
|
||||||
|
goto retry;
|
||||||
|
|
||||||
ext4_es_print_tree(inode);
|
ext4_es_print_tree(inode);
|
||||||
ext4_da_release_space(inode, reserved);
|
ext4_da_release_space(inode, reserved);
|
||||||
return err;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
||||||
|
@ -1702,11 +1747,8 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
|
||||||
|
|
||||||
(*nr_to_scan)--;
|
(*nr_to_scan)--;
|
||||||
node = rb_next(&es->rb_node);
|
node = rb_next(&es->rb_node);
|
||||||
/*
|
|
||||||
* We can't reclaim delayed extent from status tree because
|
if (ext4_es_must_keep(es))
|
||||||
* fiemap, bigallic, and seek_data/hole need to use it.
|
|
||||||
*/
|
|
||||||
if (ext4_es_is_delayed(es))
|
|
||||||
goto next;
|
goto next;
|
||||||
if (ext4_es_is_referenced(es)) {
|
if (ext4_es_is_referenced(es)) {
|
||||||
ext4_es_clear_referenced(es);
|
ext4_es_clear_referenced(es);
|
||||||
|
@ -1770,7 +1812,7 @@ void ext4_clear_inode_es(struct inode *inode)
|
||||||
while (node) {
|
while (node) {
|
||||||
es = rb_entry(node, struct extent_status, rb_node);
|
es = rb_entry(node, struct extent_status, rb_node);
|
||||||
node = rb_next(node);
|
node = rb_next(node);
|
||||||
if (!ext4_es_is_delayed(es)) {
|
if (!ext4_es_must_keep(es)) {
|
||||||
rb_erase(&es->rb_node, &tree->root);
|
rb_erase(&es->rb_node, &tree->root);
|
||||||
ext4_es_free_extent(inode, es);
|
ext4_es_free_extent(inode, es);
|
||||||
}
|
}
|
||||||
|
@ -1972,17 +2014,18 @@ bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
|
||||||
* @lblk - logical block to be added
|
* @lblk - logical block to be added
|
||||||
* @allocated - indicates whether a physical cluster has been allocated for
|
* @allocated - indicates whether a physical cluster has been allocated for
|
||||||
* the logical cluster that contains the block
|
* the logical cluster that contains the block
|
||||||
*
|
|
||||||
* Returns 0 on success, negative error code on failure.
|
|
||||||
*/
|
*/
|
||||||
int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
|
void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
|
||||||
bool allocated)
|
bool allocated)
|
||||||
{
|
{
|
||||||
struct extent_status newes;
|
struct extent_status newes;
|
||||||
int err = 0;
|
int err1 = 0;
|
||||||
|
int err2 = 0;
|
||||||
|
struct extent_status *es1 = NULL;
|
||||||
|
struct extent_status *es2 = NULL;
|
||||||
|
|
||||||
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
||||||
return 0;
|
return;
|
||||||
|
|
||||||
es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
|
es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
|
||||||
lblk, inode->i_ino);
|
lblk, inode->i_ino);
|
||||||
|
@ -1994,29 +2037,37 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
|
||||||
|
|
||||||
ext4_es_insert_extent_check(inode, &newes);
|
ext4_es_insert_extent_check(inode, &newes);
|
||||||
|
|
||||||
|
retry:
|
||||||
|
if (err1 && !es1)
|
||||||
|
es1 = __es_alloc_extent(true);
|
||||||
|
if ((err1 || err2) && !es2)
|
||||||
|
es2 = __es_alloc_extent(true);
|
||||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||||
|
|
||||||
err = __es_remove_extent(inode, lblk, lblk, NULL);
|
err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
|
||||||
if (err != 0)
|
if (err1 != 0)
|
||||||
goto error;
|
goto error;
|
||||||
retry:
|
|
||||||
err = __es_insert_extent(inode, &newes);
|
err2 = __es_insert_extent(inode, &newes, es2);
|
||||||
if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
|
if (err2 != 0)
|
||||||
128, EXT4_I(inode)))
|
|
||||||
goto retry;
|
|
||||||
if (err != 0)
|
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
if (allocated)
|
if (allocated)
|
||||||
__insert_pending(inode, lblk);
|
__insert_pending(inode, lblk);
|
||||||
|
|
||||||
|
/* es is pre-allocated but not used, free it. */
|
||||||
|
if (es1 && !es1->es_len)
|
||||||
|
__es_free_extent(es1);
|
||||||
|
if (es2 && !es2->es_len)
|
||||||
|
__es_free_extent(es2);
|
||||||
error:
|
error:
|
||||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||||
|
if (err1 || err2)
|
||||||
|
goto retry;
|
||||||
|
|
||||||
ext4_es_print_tree(inode);
|
ext4_es_print_tree(inode);
|
||||||
ext4_print_pending_tree(inode);
|
ext4_print_pending_tree(inode);
|
||||||
|
return;
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -127,14 +127,14 @@ extern int __init ext4_init_es(void);
|
||||||
extern void ext4_exit_es(void);
|
extern void ext4_exit_es(void);
|
||||||
extern void ext4_es_init_tree(struct ext4_es_tree *tree);
|
extern void ext4_es_init_tree(struct ext4_es_tree *tree);
|
||||||
|
|
||||||
extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_lblk_t len, ext4_fsblk_t pblk,
|
ext4_lblk_t len, ext4_fsblk_t pblk,
|
||||||
unsigned int status);
|
unsigned int status);
|
||||||
extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
|
extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_lblk_t len, ext4_fsblk_t pblk,
|
ext4_lblk_t len, ext4_fsblk_t pblk,
|
||||||
unsigned int status);
|
unsigned int status);
|
||||||
extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
extern void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_lblk_t len);
|
ext4_lblk_t len);
|
||||||
extern void ext4_es_find_extent_range(struct inode *inode,
|
extern void ext4_es_find_extent_range(struct inode *inode,
|
||||||
int (*match_fn)(struct extent_status *es),
|
int (*match_fn)(struct extent_status *es),
|
||||||
ext4_lblk_t lblk, ext4_lblk_t end,
|
ext4_lblk_t lblk, ext4_lblk_t end,
|
||||||
|
@ -249,8 +249,8 @@ extern void ext4_exit_pending(void);
|
||||||
extern void ext4_init_pending_tree(struct ext4_pending_tree *tree);
|
extern void ext4_init_pending_tree(struct ext4_pending_tree *tree);
|
||||||
extern void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk);
|
extern void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk);
|
||||||
extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
|
extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
|
||||||
extern int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
|
extern void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
|
||||||
bool allocated);
|
bool allocated);
|
||||||
extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
|
extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_lblk_t len);
|
ext4_lblk_t len);
|
||||||
extern void ext4_clear_inode_es(struct inode *inode);
|
extern void ext4_clear_inode_es(struct inode *inode);
|
||||||
|
|
|
@ -450,13 +450,14 @@ static const struct iomap_dio_ops ext4_dio_write_ops = {
|
||||||
*/
|
*/
|
||||||
static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
|
static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
|
||||||
bool *ilock_shared, bool *extend,
|
bool *ilock_shared, bool *extend,
|
||||||
bool *unwritten)
|
bool *unwritten, int *dio_flags)
|
||||||
{
|
{
|
||||||
struct file *file = iocb->ki_filp;
|
struct file *file = iocb->ki_filp;
|
||||||
struct inode *inode = file_inode(file);
|
struct inode *inode = file_inode(file);
|
||||||
loff_t offset;
|
loff_t offset;
|
||||||
size_t count;
|
size_t count;
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
bool overwrite, unaligned_io;
|
||||||
|
|
||||||
restart:
|
restart:
|
||||||
ret = ext4_generic_write_checks(iocb, from);
|
ret = ext4_generic_write_checks(iocb, from);
|
||||||
|
@ -465,16 +466,20 @@ restart:
|
||||||
|
|
||||||
offset = iocb->ki_pos;
|
offset = iocb->ki_pos;
|
||||||
count = ret;
|
count = ret;
|
||||||
if (ext4_extending_io(inode, offset, count))
|
|
||||||
*extend = true;
|
unaligned_io = ext4_unaligned_io(inode, from, offset);
|
||||||
|
*extend = ext4_extending_io(inode, offset, count);
|
||||||
|
overwrite = ext4_overwrite_io(inode, offset, count, unwritten);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Determine whether the IO operation will overwrite allocated
|
* Determine whether we need to upgrade to an exclusive lock. This is
|
||||||
* and initialized blocks.
|
* required to change security info in file_modified(), for extending
|
||||||
* We need exclusive i_rwsem for changing security info
|
* I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten
|
||||||
* in file_modified().
|
* extents (as partial block zeroing may be required).
|
||||||
*/
|
*/
|
||||||
if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
|
if (*ilock_shared &&
|
||||||
!ext4_overwrite_io(inode, offset, count, unwritten))) {
|
((!IS_NOSEC(inode) || *extend || !overwrite ||
|
||||||
|
(unaligned_io && *unwritten)))) {
|
||||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||||
ret = -EAGAIN;
|
ret = -EAGAIN;
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -485,6 +490,32 @@ restart:
|
||||||
goto restart;
|
goto restart;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now that locking is settled, determine dio flags and exclusivity
|
||||||
|
* requirements. Unaligned writes are allowed under shared lock so long
|
||||||
|
* as they are pure overwrites. Set the iomap overwrite only flag as an
|
||||||
|
* added precaution in this case. Even though this is unnecessary, we
|
||||||
|
* can detect and warn on unexpected -EAGAIN if an unsafe unaligned
|
||||||
|
* write is ever submitted.
|
||||||
|
*
|
||||||
|
* Otherwise, concurrent unaligned writes risk data corruption due to
|
||||||
|
* partial block zeroing in the dio layer, and so the I/O must occur
|
||||||
|
* exclusively. The inode lock is already held exclusive if the write is
|
||||||
|
* non-overwrite or extending, so drain all outstanding dio and set the
|
||||||
|
* force wait dio flag.
|
||||||
|
*/
|
||||||
|
if (*ilock_shared && unaligned_io) {
|
||||||
|
*dio_flags = IOMAP_DIO_OVERWRITE_ONLY;
|
||||||
|
} else if (!*ilock_shared && (unaligned_io || *extend)) {
|
||||||
|
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||||
|
ret = -EAGAIN;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
if (unaligned_io && (!overwrite || *unwritten))
|
||||||
|
inode_dio_wait(inode);
|
||||||
|
*dio_flags = IOMAP_DIO_FORCE_WAIT;
|
||||||
|
}
|
||||||
|
|
||||||
ret = file_modified(file);
|
ret = file_modified(file);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -506,17 +537,10 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
loff_t offset = iocb->ki_pos;
|
loff_t offset = iocb->ki_pos;
|
||||||
size_t count = iov_iter_count(from);
|
size_t count = iov_iter_count(from);
|
||||||
const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
|
const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
|
||||||
bool extend = false, unaligned_io = false, unwritten = false;
|
bool extend = false, unwritten = false;
|
||||||
bool ilock_shared = true;
|
bool ilock_shared = true;
|
||||||
|
int dio_flags = 0;
|
||||||
|
|
||||||
/*
|
|
||||||
* We initially start with shared inode lock unless it is
|
|
||||||
* unaligned IO which needs exclusive lock anyways.
|
|
||||||
*/
|
|
||||||
if (ext4_unaligned_io(inode, from, offset)) {
|
|
||||||
unaligned_io = true;
|
|
||||||
ilock_shared = false;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Quick check here without any i_rwsem lock to see if it is extending
|
* Quick check here without any i_rwsem lock to see if it is extending
|
||||||
* IO. A more reliable check is done in ext4_dio_write_checks() with
|
* IO. A more reliable check is done in ext4_dio_write_checks() with
|
||||||
|
@ -549,16 +573,11 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
return ext4_buffered_write_iter(iocb, from);
|
return ext4_buffered_write_iter(iocb, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = ext4_dio_write_checks(iocb, from,
|
ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend,
|
||||||
&ilock_shared, &extend, &unwritten);
|
&unwritten, &dio_flags);
|
||||||
if (ret <= 0)
|
if (ret <= 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
/* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */
|
|
||||||
if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io || extend)) {
|
|
||||||
ret = -EAGAIN;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
* Make sure inline data cannot be created anymore since we are going
|
* Make sure inline data cannot be created anymore since we are going
|
||||||
* to allocate blocks for DIO. We know the inode does not have any
|
* to allocate blocks for DIO. We know the inode does not have any
|
||||||
|
@ -569,19 +588,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
offset = iocb->ki_pos;
|
offset = iocb->ki_pos;
|
||||||
count = ret;
|
count = ret;
|
||||||
|
|
||||||
/*
|
|
||||||
* Unaligned direct IO must be serialized among each other as zeroing
|
|
||||||
* of partial blocks of two competing unaligned IOs can result in data
|
|
||||||
* corruption.
|
|
||||||
*
|
|
||||||
* So we make sure we don't allow any unaligned IO in flight.
|
|
||||||
* For IOs where we need not wait (like unaligned non-AIO DIO),
|
|
||||||
* below inode_dio_wait() may anyway become a no-op, since we start
|
|
||||||
* with exclusive lock.
|
|
||||||
*/
|
|
||||||
if (unaligned_io)
|
|
||||||
inode_dio_wait(inode);
|
|
||||||
|
|
||||||
if (extend) {
|
if (extend) {
|
||||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
|
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
|
||||||
if (IS_ERR(handle)) {
|
if (IS_ERR(handle)) {
|
||||||
|
@ -601,8 +607,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
if (ilock_shared && !unwritten)
|
if (ilock_shared && !unwritten)
|
||||||
iomap_ops = &ext4_iomap_overwrite_ops;
|
iomap_ops = &ext4_iomap_overwrite_ops;
|
||||||
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
|
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
|
||||||
(unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0,
|
dio_flags, NULL, 0);
|
||||||
NULL, 0);
|
WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT));
|
||||||
if (ret == -ENOTBLK)
|
if (ret == -ENOTBLK)
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
|
|
|
@ -651,6 +651,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
|
||||||
|
|
||||||
ext4_update_inode_fsync_trans(handle, inode, 1);
|
ext4_update_inode_fsync_trans(handle, inode, 1);
|
||||||
count = ar.len;
|
count = ar.len;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update reserved blocks/metadata blocks after successful block
|
||||||
|
* allocation which had been deferred till now.
|
||||||
|
*/
|
||||||
|
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
|
||||||
|
ext4_da_update_reserve_space(inode, count, 1);
|
||||||
|
|
||||||
got_it:
|
got_it:
|
||||||
map->m_flags |= EXT4_MAP_MAPPED;
|
map->m_flags |= EXT4_MAP_MAPPED;
|
||||||
map->m_pblk = le32_to_cpu(chain[depth-1].key);
|
map->m_pblk = le32_to_cpu(chain[depth-1].key);
|
||||||
|
|
|
@ -741,9 +741,8 @@ convert:
|
||||||
}
|
}
|
||||||
|
|
||||||
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
|
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
|
||||||
unsigned copied, struct page *page)
|
unsigned copied, struct folio *folio)
|
||||||
{
|
{
|
||||||
struct folio *folio = page_folio(page);
|
|
||||||
handle_t *handle = ext4_journal_current_handle();
|
handle_t *handle = ext4_journal_current_handle();
|
||||||
int no_expand;
|
int no_expand;
|
||||||
void *kaddr;
|
void *kaddr;
|
||||||
|
@ -823,30 +822,6 @@ out:
|
||||||
return ret ? ret : copied;
|
return ret ? ret : copied;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct buffer_head *
|
|
||||||
ext4_journalled_write_inline_data(struct inode *inode,
|
|
||||||
unsigned len,
|
|
||||||
struct page *page)
|
|
||||||
{
|
|
||||||
int ret, no_expand;
|
|
||||||
void *kaddr;
|
|
||||||
struct ext4_iloc iloc;
|
|
||||||
|
|
||||||
ret = ext4_get_inode_loc(inode, &iloc);
|
|
||||||
if (ret) {
|
|
||||||
ext4_std_error(inode->i_sb, ret);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
ext4_write_lock_xattr(inode, &no_expand);
|
|
||||||
kaddr = kmap_atomic(page);
|
|
||||||
ext4_write_inline_data(inode, &iloc, kaddr, 0, len);
|
|
||||||
kunmap_atomic(kaddr);
|
|
||||||
ext4_write_unlock_xattr(inode, &no_expand);
|
|
||||||
|
|
||||||
return iloc.bh;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to make the page cache and handle ready for the inline data case.
|
* Try to make the page cache and handle ready for the inline data case.
|
||||||
* We can call this function in 2 cases:
|
* We can call this function in 2 cases:
|
||||||
|
@ -1964,16 +1939,8 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
|
||||||
* the extent status cache must be cleared to avoid leaving
|
* the extent status cache must be cleared to avoid leaving
|
||||||
* behind stale delayed allocated extent entries
|
* behind stale delayed allocated extent entries
|
||||||
*/
|
*/
|
||||||
if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
|
if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
|
||||||
retry:
|
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
|
||||||
err = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
|
|
||||||
if (err == -ENOMEM) {
|
|
||||||
memalloc_retry_wait(GFP_ATOMIC);
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
if (err)
|
|
||||||
goto out_error;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Clear the content in the xattr space. */
|
/* Clear the content in the xattr space. */
|
||||||
if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) {
|
if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) {
|
||||||
|
|
114
fs/ext4/inode.c
114
fs/ext4/inode.c
|
@ -567,10 +567,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||||
ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
|
ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
|
||||||
map->m_lblk + map->m_len - 1))
|
map->m_lblk + map->m_len - 1))
|
||||||
status |= EXTENT_STATUS_DELAYED;
|
status |= EXTENT_STATUS_DELAYED;
|
||||||
ret = ext4_es_insert_extent(inode, map->m_lblk,
|
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
||||||
map->m_len, map->m_pblk, status);
|
map->m_pblk, status);
|
||||||
if (ret < 0)
|
|
||||||
retval = ret;
|
|
||||||
}
|
}
|
||||||
up_read((&EXT4_I(inode)->i_data_sem));
|
up_read((&EXT4_I(inode)->i_data_sem));
|
||||||
|
|
||||||
|
@ -632,16 +630,6 @@ found:
|
||||||
*/
|
*/
|
||||||
ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
|
ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Update reserved blocks/metadata blocks after successful
|
|
||||||
* block allocation which had been deferred till now. We don't
|
|
||||||
* support fallocate for non extent files. So we can update
|
|
||||||
* reserve space here.
|
|
||||||
*/
|
|
||||||
if ((retval > 0) &&
|
|
||||||
(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
|
|
||||||
ext4_da_update_reserve_space(inode, retval, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (retval > 0) {
|
if (retval > 0) {
|
||||||
|
@ -689,12 +677,8 @@ found:
|
||||||
ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
|
ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
|
||||||
map->m_lblk + map->m_len - 1))
|
map->m_lblk + map->m_len - 1))
|
||||||
status |= EXTENT_STATUS_DELAYED;
|
status |= EXTENT_STATUS_DELAYED;
|
||||||
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
||||||
map->m_pblk, status);
|
map->m_pblk, status);
|
||||||
if (ret < 0) {
|
|
||||||
retval = ret;
|
|
||||||
goto out_sem;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
out_sem:
|
out_sem:
|
||||||
|
@ -1287,7 +1271,8 @@ static int ext4_write_end(struct file *file,
|
||||||
|
|
||||||
if (ext4_has_inline_data(inode) &&
|
if (ext4_has_inline_data(inode) &&
|
||||||
ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
|
ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
|
||||||
return ext4_write_inline_data_end(inode, pos, len, copied, page);
|
return ext4_write_inline_data_end(inode, pos, len, copied,
|
||||||
|
folio);
|
||||||
|
|
||||||
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
|
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
|
||||||
/*
|
/*
|
||||||
|
@ -1395,7 +1380,8 @@ static int ext4_journalled_write_end(struct file *file,
|
||||||
BUG_ON(!ext4_handle_valid(handle));
|
BUG_ON(!ext4_handle_valid(handle));
|
||||||
|
|
||||||
if (ext4_has_inline_data(inode))
|
if (ext4_has_inline_data(inode))
|
||||||
return ext4_write_inline_data_end(inode, pos, len, copied, page);
|
return ext4_write_inline_data_end(inode, pos, len, copied,
|
||||||
|
folio);
|
||||||
|
|
||||||
if (unlikely(copied < len) && !folio_test_uptodate(folio)) {
|
if (unlikely(copied < len) && !folio_test_uptodate(folio)) {
|
||||||
copied = 0;
|
copied = 0;
|
||||||
|
@ -1638,7 +1624,6 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
int ret;
|
int ret;
|
||||||
bool allocated = false;
|
bool allocated = false;
|
||||||
bool reserved = false;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the cluster containing lblk is shared with a delayed,
|
* If the cluster containing lblk is shared with a delayed,
|
||||||
|
@ -1654,8 +1639,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
|
||||||
if (sbi->s_cluster_ratio == 1) {
|
if (sbi->s_cluster_ratio == 1) {
|
||||||
ret = ext4_da_reserve_space(inode);
|
ret = ext4_da_reserve_space(inode);
|
||||||
if (ret != 0) /* ENOSPC */
|
if (ret != 0) /* ENOSPC */
|
||||||
goto errout;
|
return ret;
|
||||||
reserved = true;
|
|
||||||
} else { /* bigalloc */
|
} else { /* bigalloc */
|
||||||
if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
|
if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
|
||||||
if (!ext4_es_scan_clu(inode,
|
if (!ext4_es_scan_clu(inode,
|
||||||
|
@ -1663,12 +1647,11 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
|
||||||
ret = ext4_clu_mapped(inode,
|
ret = ext4_clu_mapped(inode,
|
||||||
EXT4_B2C(sbi, lblk));
|
EXT4_B2C(sbi, lblk));
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto errout;
|
return ret;
|
||||||
if (ret == 0) {
|
if (ret == 0) {
|
||||||
ret = ext4_da_reserve_space(inode);
|
ret = ext4_da_reserve_space(inode);
|
||||||
if (ret != 0) /* ENOSPC */
|
if (ret != 0) /* ENOSPC */
|
||||||
goto errout;
|
return ret;
|
||||||
reserved = true;
|
|
||||||
} else {
|
} else {
|
||||||
allocated = true;
|
allocated = true;
|
||||||
}
|
}
|
||||||
|
@ -1678,12 +1661,8 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = ext4_es_insert_delayed_block(inode, lblk, allocated);
|
ext4_es_insert_delayed_block(inode, lblk, allocated);
|
||||||
if (ret && reserved)
|
return 0;
|
||||||
ext4_da_release_space(inode, 1);
|
|
||||||
|
|
||||||
errout:
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1780,7 +1759,6 @@ add_delayed:
|
||||||
set_buffer_new(bh);
|
set_buffer_new(bh);
|
||||||
set_buffer_delay(bh);
|
set_buffer_delay(bh);
|
||||||
} else if (retval > 0) {
|
} else if (retval > 0) {
|
||||||
int ret;
|
|
||||||
unsigned int status;
|
unsigned int status;
|
||||||
|
|
||||||
if (unlikely(retval != map->m_len)) {
|
if (unlikely(retval != map->m_len)) {
|
||||||
|
@ -1793,10 +1771,8 @@ add_delayed:
|
||||||
|
|
||||||
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
|
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
|
||||||
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
|
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
|
||||||
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
||||||
map->m_pblk, status);
|
map->m_pblk, status);
|
||||||
if (ret != 0)
|
|
||||||
retval = ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
|
@ -2321,11 +2297,11 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
|
||||||
MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp);
|
MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ext4_journal_page_buffers(handle_t *handle, struct page *page,
|
static int ext4_journal_folio_buffers(handle_t *handle, struct folio *folio,
|
||||||
int len)
|
size_t len)
|
||||||
{
|
{
|
||||||
struct buffer_head *page_bufs = page_buffers(page);
|
struct buffer_head *page_bufs = folio_buffers(folio);
|
||||||
struct inode *inode = page->mapping->host;
|
struct inode *inode = folio->mapping->host;
|
||||||
int ret, err;
|
int ret, err;
|
||||||
|
|
||||||
ret = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len,
|
ret = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len,
|
||||||
|
@ -2334,7 +2310,7 @@ static int ext4_journal_page_buffers(handle_t *handle, struct page *page,
|
||||||
NULL, write_end_fn);
|
NULL, write_end_fn);
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
ret = err;
|
ret = err;
|
||||||
err = ext4_jbd2_inode_add_write(handle, inode, page_offset(page), len);
|
err = ext4_jbd2_inode_add_write(handle, inode, folio_pos(folio), len);
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
ret = err;
|
ret = err;
|
||||||
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
|
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
|
||||||
|
@ -2344,22 +2320,20 @@ static int ext4_journal_page_buffers(handle_t *handle, struct page *page,
|
||||||
|
|
||||||
static int mpage_journal_page_buffers(handle_t *handle,
|
static int mpage_journal_page_buffers(handle_t *handle,
|
||||||
struct mpage_da_data *mpd,
|
struct mpage_da_data *mpd,
|
||||||
struct page *page)
|
struct folio *folio)
|
||||||
{
|
{
|
||||||
struct inode *inode = mpd->inode;
|
struct inode *inode = mpd->inode;
|
||||||
loff_t size = i_size_read(inode);
|
loff_t size = i_size_read(inode);
|
||||||
int len;
|
size_t len = folio_size(folio);
|
||||||
|
|
||||||
ClearPageChecked(page);
|
folio_clear_checked(folio);
|
||||||
mpd->wbc->nr_to_write--;
|
mpd->wbc->nr_to_write--;
|
||||||
|
|
||||||
if (page->index == size >> PAGE_SHIFT &&
|
if (folio_pos(folio) + len > size &&
|
||||||
!ext4_verity_in_progress(inode))
|
!ext4_verity_in_progress(inode))
|
||||||
len = size & ~PAGE_MASK;
|
len = size - folio_pos(folio);
|
||||||
else
|
|
||||||
len = PAGE_SIZE;
|
|
||||||
|
|
||||||
return ext4_journal_page_buffers(handle, page, len);
|
return ext4_journal_folio_buffers(handle, folio, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2499,7 +2473,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
|
||||||
/* Pending dirtying of journalled data? */
|
/* Pending dirtying of journalled data? */
|
||||||
if (folio_test_checked(folio)) {
|
if (folio_test_checked(folio)) {
|
||||||
err = mpage_journal_page_buffers(handle,
|
err = mpage_journal_page_buffers(handle,
|
||||||
mpd, &folio->page);
|
mpd, folio);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
goto out;
|
goto out;
|
||||||
mpd->journalled_more_data = 1;
|
mpd->journalled_more_data = 1;
|
||||||
|
@ -2944,15 +2918,15 @@ retry:
|
||||||
* Check if we should update i_disksize
|
* Check if we should update i_disksize
|
||||||
* when write to the end of file but not require block allocation
|
* when write to the end of file but not require block allocation
|
||||||
*/
|
*/
|
||||||
static int ext4_da_should_update_i_disksize(struct page *page,
|
static int ext4_da_should_update_i_disksize(struct folio *folio,
|
||||||
unsigned long offset)
|
unsigned long offset)
|
||||||
{
|
{
|
||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
struct inode *inode = page->mapping->host;
|
struct inode *inode = folio->mapping->host;
|
||||||
unsigned int idx;
|
unsigned int idx;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
bh = page_buffers(page);
|
bh = folio_buffers(folio);
|
||||||
idx = offset >> inode->i_blkbits;
|
idx = offset >> inode->i_blkbits;
|
||||||
|
|
||||||
for (i = 0; i < idx; i++)
|
for (i = 0; i < idx; i++)
|
||||||
|
@ -2972,17 +2946,19 @@ static int ext4_da_write_end(struct file *file,
|
||||||
loff_t new_i_size;
|
loff_t new_i_size;
|
||||||
unsigned long start, end;
|
unsigned long start, end;
|
||||||
int write_mode = (int)(unsigned long)fsdata;
|
int write_mode = (int)(unsigned long)fsdata;
|
||||||
|
struct folio *folio = page_folio(page);
|
||||||
|
|
||||||
if (write_mode == FALL_BACK_TO_NONDELALLOC)
|
if (write_mode == FALL_BACK_TO_NONDELALLOC)
|
||||||
return ext4_write_end(file, mapping, pos,
|
return ext4_write_end(file, mapping, pos,
|
||||||
len, copied, page, fsdata);
|
len, copied, &folio->page, fsdata);
|
||||||
|
|
||||||
trace_ext4_da_write_end(inode, pos, len, copied);
|
trace_ext4_da_write_end(inode, pos, len, copied);
|
||||||
|
|
||||||
if (write_mode != CONVERT_INLINE_DATA &&
|
if (write_mode != CONVERT_INLINE_DATA &&
|
||||||
ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
|
ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
|
||||||
ext4_has_inline_data(inode))
|
ext4_has_inline_data(inode))
|
||||||
return ext4_write_inline_data_end(inode, pos, len, copied, page);
|
return ext4_write_inline_data_end(inode, pos, len, copied,
|
||||||
|
folio);
|
||||||
|
|
||||||
if (unlikely(copied < len) && !PageUptodate(page))
|
if (unlikely(copied < len) && !PageUptodate(page))
|
||||||
copied = 0;
|
copied = 0;
|
||||||
|
@ -3006,10 +2982,11 @@ static int ext4_da_write_end(struct file *file,
|
||||||
*/
|
*/
|
||||||
new_i_size = pos + copied;
|
new_i_size = pos + copied;
|
||||||
if (copied && new_i_size > inode->i_size &&
|
if (copied && new_i_size > inode->i_size &&
|
||||||
ext4_da_should_update_i_disksize(page, end))
|
ext4_da_should_update_i_disksize(folio, end))
|
||||||
ext4_update_i_disksize(inode, new_i_size);
|
ext4_update_i_disksize(inode, new_i_size);
|
||||||
|
|
||||||
return generic_write_end(file, mapping, pos, len, copied, page, fsdata);
|
return generic_write_end(file, mapping, pos, len, copied, &folio->page,
|
||||||
|
fsdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3105,7 +3082,7 @@ static int ext4_read_folio(struct file *file, struct folio *folio)
|
||||||
int ret = -EAGAIN;
|
int ret = -EAGAIN;
|
||||||
struct inode *inode = folio->mapping->host;
|
struct inode *inode = folio->mapping->host;
|
||||||
|
|
||||||
trace_ext4_readpage(&folio->page);
|
trace_ext4_read_folio(inode, folio);
|
||||||
|
|
||||||
if (ext4_has_inline_data(inode))
|
if (ext4_has_inline_data(inode))
|
||||||
ret = ext4_readpage_inline(inode, folio);
|
ret = ext4_readpage_inline(inode, folio);
|
||||||
|
@ -3164,9 +3141,10 @@ static void ext4_journalled_invalidate_folio(struct folio *folio,
|
||||||
|
|
||||||
static bool ext4_release_folio(struct folio *folio, gfp_t wait)
|
static bool ext4_release_folio(struct folio *folio, gfp_t wait)
|
||||||
{
|
{
|
||||||
journal_t *journal = EXT4_JOURNAL(folio->mapping->host);
|
struct inode *inode = folio->mapping->host;
|
||||||
|
journal_t *journal = EXT4_JOURNAL(inode);
|
||||||
|
|
||||||
trace_ext4_releasepage(&folio->page);
|
trace_ext4_release_folio(inode, folio);
|
||||||
|
|
||||||
/* Page has dirty journalled data -> cannot release */
|
/* Page has dirty journalled data -> cannot release */
|
||||||
if (folio_test_checked(folio))
|
if (folio_test_checked(folio))
|
||||||
|
@ -3992,12 +3970,8 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
|
||||||
down_write(&EXT4_I(inode)->i_data_sem);
|
down_write(&EXT4_I(inode)->i_data_sem);
|
||||||
ext4_discard_preallocations(inode, 0);
|
ext4_discard_preallocations(inode, 0);
|
||||||
|
|
||||||
ret = ext4_es_remove_extent(inode, first_block,
|
ext4_es_remove_extent(inode, first_block,
|
||||||
stop_block - first_block);
|
stop_block - first_block);
|
||||||
if (ret) {
|
|
||||||
up_write(&EXT4_I(inode)->i_data_sem);
|
|
||||||
goto out_stop;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||||
ret = ext4_ext_remove_space(inode, first_block,
|
ret = ext4_ext_remove_space(inode, first_block,
|
||||||
|
@ -6156,7 +6130,7 @@ retry_alloc:
|
||||||
err = __block_write_begin(&folio->page, 0, len, ext4_get_block);
|
err = __block_write_begin(&folio->page, 0, len, ext4_get_block);
|
||||||
if (!err) {
|
if (!err) {
|
||||||
ret = VM_FAULT_SIGBUS;
|
ret = VM_FAULT_SIGBUS;
|
||||||
if (ext4_journal_page_buffers(handle, &folio->page, len))
|
if (ext4_journal_folio_buffers(handle, folio, len))
|
||||||
goto out_error;
|
goto out_error;
|
||||||
} else {
|
} else {
|
||||||
folio_unlock(folio);
|
folio_unlock(folio);
|
||||||
|
|
|
@ -796,6 +796,7 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid)
|
||||||
int ext4_force_shutdown(struct super_block *sb, u32 flags)
|
int ext4_force_shutdown(struct super_block *sb, u32 flags)
|
||||||
{
|
{
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH)
|
if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -808,7 +809,9 @@ int ext4_force_shutdown(struct super_block *sb, u32 flags)
|
||||||
|
|
||||||
switch (flags) {
|
switch (flags) {
|
||||||
case EXT4_GOING_FLAGS_DEFAULT:
|
case EXT4_GOING_FLAGS_DEFAULT:
|
||||||
freeze_bdev(sb->s_bdev);
|
ret = freeze_bdev(sb->s_bdev);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
|
set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
|
||||||
thaw_bdev(sb->s_bdev);
|
thaw_bdev(sb->s_bdev);
|
||||||
break;
|
break;
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -49,7 +49,7 @@
|
||||||
#define MB_DEFAULT_MIN_TO_SCAN 10
|
#define MB_DEFAULT_MIN_TO_SCAN 10
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* with 'ext4_mb_stats' allocator will collect stats that will be
|
* with 's_mb_stats' allocator will collect stats that will be
|
||||||
* shown at umount. The collecting costs though!
|
* shown at umount. The collecting costs though!
|
||||||
*/
|
*/
|
||||||
#define MB_DEFAULT_STATS 0
|
#define MB_DEFAULT_STATS 0
|
||||||
|
@ -85,6 +85,13 @@
|
||||||
*/
|
*/
|
||||||
#define MB_DEFAULT_LINEAR_SCAN_THRESHOLD 16
|
#define MB_DEFAULT_LINEAR_SCAN_THRESHOLD 16
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The maximum order upto which CR_BEST_AVAIL_LEN can trim a particular
|
||||||
|
* allocation request. Example, if we have an order 7 request and max trim order
|
||||||
|
* of 3, we can trim this request upto order 4.
|
||||||
|
*/
|
||||||
|
#define MB_DEFAULT_BEST_AVAIL_TRIM_ORDER 3
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Number of valid buddy orders
|
* Number of valid buddy orders
|
||||||
*/
|
*/
|
||||||
|
@ -179,11 +186,18 @@ struct ext4_allocation_context {
|
||||||
/* copy of the best found extent taken before preallocation efforts */
|
/* copy of the best found extent taken before preallocation efforts */
|
||||||
struct ext4_free_extent ac_f_ex;
|
struct ext4_free_extent ac_f_ex;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* goal len can change in CR1.5, so save the original len. This is
|
||||||
|
* used while adjusting the PA window and for accounting.
|
||||||
|
*/
|
||||||
|
ext4_grpblk_t ac_orig_goal_len;
|
||||||
|
|
||||||
__u32 ac_groups_considered;
|
__u32 ac_groups_considered;
|
||||||
__u32 ac_flags; /* allocation hints */
|
__u32 ac_flags; /* allocation hints */
|
||||||
__u16 ac_groups_scanned;
|
__u16 ac_groups_scanned;
|
||||||
__u16 ac_groups_linear_remaining;
|
__u16 ac_groups_linear_remaining;
|
||||||
__u16 ac_found;
|
__u16 ac_found;
|
||||||
|
__u16 ac_cX_found[EXT4_MB_NUM_CRS];
|
||||||
__u16 ac_tail;
|
__u16 ac_tail;
|
||||||
__u16 ac_buddy;
|
__u16 ac_buddy;
|
||||||
__u8 ac_status;
|
__u8 ac_status;
|
||||||
|
|
|
@ -334,7 +334,7 @@ int ext4_mpage_readpages(struct inode *inode,
|
||||||
folio_size(folio));
|
folio_size(folio));
|
||||||
if (first_hole == 0) {
|
if (first_hole == 0) {
|
||||||
if (ext4_need_verity(inode, folio->index) &&
|
if (ext4_need_verity(inode, folio->index) &&
|
||||||
!fsverity_verify_page(&folio->page))
|
!fsverity_verify_folio(folio))
|
||||||
goto set_error_page;
|
goto set_error_page;
|
||||||
folio_mark_uptodate(folio);
|
folio_mark_uptodate(folio);
|
||||||
folio_unlock(folio);
|
folio_unlock(folio);
|
||||||
|
|
|
@ -1133,6 +1133,12 @@ static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
|
||||||
struct block_device *bdev;
|
struct block_device *bdev;
|
||||||
bdev = sbi->s_journal_bdev;
|
bdev = sbi->s_journal_bdev;
|
||||||
if (bdev) {
|
if (bdev) {
|
||||||
|
/*
|
||||||
|
* Invalidate the journal device's buffers. We don't want them
|
||||||
|
* floating about in memory - the physical journal device may
|
||||||
|
* hotswapped, and it breaks the `ro-after' testing code.
|
||||||
|
*/
|
||||||
|
invalidate_bdev(bdev);
|
||||||
blkdev_put(bdev, sbi->s_sb);
|
blkdev_put(bdev, sbi->s_sb);
|
||||||
sbi->s_journal_bdev = NULL;
|
sbi->s_journal_bdev = NULL;
|
||||||
}
|
}
|
||||||
|
@ -1164,12 +1170,12 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
|
||||||
#ifdef CONFIG_QUOTA
|
#ifdef CONFIG_QUOTA
|
||||||
static int ext4_quota_off(struct super_block *sb, int type);
|
static int ext4_quota_off(struct super_block *sb, int type);
|
||||||
|
|
||||||
static inline void ext4_quota_off_umount(struct super_block *sb)
|
static inline void ext4_quotas_off(struct super_block *sb, int type)
|
||||||
{
|
{
|
||||||
int type;
|
BUG_ON(type > EXT4_MAXQUOTAS);
|
||||||
|
|
||||||
/* Use our quota_off function to clear inode flags etc. */
|
/* Use our quota_off function to clear inode flags etc. */
|
||||||
for (type = 0; type < EXT4_MAXQUOTAS; type++)
|
for (type--; type >= 0; type--)
|
||||||
ext4_quota_off(sb, type);
|
ext4_quota_off(sb, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1185,7 +1191,7 @@ static inline char *get_qf_name(struct super_block *sb,
|
||||||
lockdep_is_held(&sb->s_umount));
|
lockdep_is_held(&sb->s_umount));
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static inline void ext4_quota_off_umount(struct super_block *sb)
|
static inline void ext4_quotas_off(struct super_block *sb, int type)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1285,7 +1291,7 @@ static void ext4_put_super(struct super_block *sb)
|
||||||
&sb->s_uuid);
|
&sb->s_uuid);
|
||||||
|
|
||||||
ext4_unregister_li_request(sb);
|
ext4_unregister_li_request(sb);
|
||||||
ext4_quota_off_umount(sb);
|
ext4_quotas_off(sb, EXT4_MAXQUOTAS);
|
||||||
|
|
||||||
flush_work(&sbi->s_error_work);
|
flush_work(&sbi->s_error_work);
|
||||||
destroy_workqueue(sbi->rsv_conversion_wq);
|
destroy_workqueue(sbi->rsv_conversion_wq);
|
||||||
|
@ -1332,14 +1338,8 @@ static void ext4_put_super(struct super_block *sb)
|
||||||
|
|
||||||
sync_blockdev(sb->s_bdev);
|
sync_blockdev(sb->s_bdev);
|
||||||
invalidate_bdev(sb->s_bdev);
|
invalidate_bdev(sb->s_bdev);
|
||||||
if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) {
|
if (sbi->s_journal_bdev) {
|
||||||
/*
|
|
||||||
* Invalidate the journal device's buffers. We don't want them
|
|
||||||
* floating about in memory - the physical journal device may
|
|
||||||
* hotswapped, and it breaks the `ro-after' testing code.
|
|
||||||
*/
|
|
||||||
sync_blockdev(sbi->s_journal_bdev);
|
sync_blockdev(sbi->s_journal_bdev);
|
||||||
invalidate_bdev(sbi->s_journal_bdev);
|
|
||||||
ext4_blkdev_remove(sbi);
|
ext4_blkdev_remove(sbi);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3703,16 +3703,13 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
|
||||||
ext4_group_t group = elr->lr_next_group;
|
ext4_group_t group = elr->lr_next_group;
|
||||||
unsigned int prefetch_ios = 0;
|
unsigned int prefetch_ios = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
int nr = EXT4_SB(sb)->s_mb_prefetch;
|
||||||
u64 start_time;
|
u64 start_time;
|
||||||
|
|
||||||
if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
|
if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
|
||||||
elr->lr_next_group = ext4_mb_prefetch(sb, group,
|
elr->lr_next_group = ext4_mb_prefetch(sb, group, nr, &prefetch_ios);
|
||||||
EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
|
ext4_mb_prefetch_fini(sb, elr->lr_next_group, nr);
|
||||||
if (prefetch_ios)
|
trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group, nr);
|
||||||
ext4_mb_prefetch_fini(sb, elr->lr_next_group,
|
|
||||||
prefetch_ios);
|
|
||||||
trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
|
|
||||||
prefetch_ios);
|
|
||||||
if (group >= elr->lr_next_group) {
|
if (group >= elr->lr_next_group) {
|
||||||
ret = 1;
|
ret = 1;
|
||||||
if (elr->lr_first_not_zeroed != ngroups &&
|
if (elr->lr_first_not_zeroed != ngroups &&
|
||||||
|
@ -5308,6 +5305,19 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||||
goto failed_mount3;
|
goto failed_mount3;
|
||||||
|
|
||||||
sbi->s_stripe = ext4_get_stripe_size(sbi);
|
sbi->s_stripe = ext4_get_stripe_size(sbi);
|
||||||
|
/*
|
||||||
|
* It's hard to get stripe aligned blocks if stripe is not aligned with
|
||||||
|
* cluster, just disable stripe and alert user to simpfy code and avoid
|
||||||
|
* stripe aligned allocation which will rarely successes.
|
||||||
|
*/
|
||||||
|
if (sbi->s_stripe > 0 && sbi->s_cluster_ratio > 1 &&
|
||||||
|
sbi->s_stripe % sbi->s_cluster_ratio != 0) {
|
||||||
|
ext4_msg(sb, KERN_WARNING,
|
||||||
|
"stripe (%lu) is not aligned with cluster size (%u), "
|
||||||
|
"stripe is disabled",
|
||||||
|
sbi->s_stripe, sbi->s_cluster_ratio);
|
||||||
|
sbi->s_stripe = 0;
|
||||||
|
}
|
||||||
sbi->s_extent_max_zeroout_kb = 32;
|
sbi->s_extent_max_zeroout_kb = 32;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -5578,7 +5588,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||||
ext4_msg(sb, KERN_INFO, "recovery complete");
|
ext4_msg(sb, KERN_INFO, "recovery complete");
|
||||||
err = ext4_mark_recovery_complete(sb, es);
|
err = ext4_mark_recovery_complete(sb, es);
|
||||||
if (err)
|
if (err)
|
||||||
goto failed_mount9;
|
goto failed_mount10;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
|
if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
|
||||||
|
@ -5597,7 +5607,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
failed_mount9:
|
failed_mount10:
|
||||||
|
ext4_quotas_off(sb, EXT4_MAXQUOTAS);
|
||||||
|
failed_mount9: __maybe_unused
|
||||||
ext4_release_orphan_info(sb);
|
ext4_release_orphan_info(sb);
|
||||||
failed_mount8:
|
failed_mount8:
|
||||||
ext4_unregister_sysfs(sb);
|
ext4_unregister_sysfs(sb);
|
||||||
|
@ -5656,6 +5668,7 @@ failed_mount:
|
||||||
brelse(sbi->s_sbh);
|
brelse(sbi->s_sbh);
|
||||||
ext4_blkdev_remove(sbi);
|
ext4_blkdev_remove(sbi);
|
||||||
out_fail:
|
out_fail:
|
||||||
|
invalidate_bdev(sb->s_bdev);
|
||||||
sb->s_fs_info = NULL;
|
sb->s_fs_info = NULL;
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -5738,6 +5751,11 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
|
||||||
journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
|
journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
|
||||||
else
|
else
|
||||||
journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
|
journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
|
||||||
|
/*
|
||||||
|
* Always enable journal cycle record option, letting the journal
|
||||||
|
* records log transactions continuously between each mount.
|
||||||
|
*/
|
||||||
|
journal->j_flags |= JBD2_CYCLE_RECORD;
|
||||||
write_unlock(&journal->j_state_lock);
|
write_unlock(&journal->j_state_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5990,19 +6008,27 @@ static int ext4_load_journal(struct super_block *sb,
|
||||||
err = jbd2_journal_wipe(journal, !really_read_only);
|
err = jbd2_journal_wipe(journal, !really_read_only);
|
||||||
if (!err) {
|
if (!err) {
|
||||||
char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
|
char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
|
||||||
|
__le16 orig_state;
|
||||||
|
bool changed = false;
|
||||||
|
|
||||||
if (save)
|
if (save)
|
||||||
memcpy(save, ((char *) es) +
|
memcpy(save, ((char *) es) +
|
||||||
EXT4_S_ERR_START, EXT4_S_ERR_LEN);
|
EXT4_S_ERR_START, EXT4_S_ERR_LEN);
|
||||||
err = jbd2_journal_load(journal);
|
err = jbd2_journal_load(journal);
|
||||||
if (save)
|
if (save && memcmp(((char *) es) + EXT4_S_ERR_START,
|
||||||
|
save, EXT4_S_ERR_LEN)) {
|
||||||
memcpy(((char *) es) + EXT4_S_ERR_START,
|
memcpy(((char *) es) + EXT4_S_ERR_START,
|
||||||
save, EXT4_S_ERR_LEN);
|
save, EXT4_S_ERR_LEN);
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
kfree(save);
|
kfree(save);
|
||||||
|
orig_state = es->s_state;
|
||||||
es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
|
es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
|
||||||
EXT4_ERROR_FS);
|
EXT4_ERROR_FS);
|
||||||
|
if (orig_state != es->s_state)
|
||||||
|
changed = true;
|
||||||
/* Write out restored error information to the superblock */
|
/* Write out restored error information to the superblock */
|
||||||
if (!bdev_read_only(sb->s_bdev)) {
|
if (changed && !really_read_only) {
|
||||||
int err2;
|
int err2;
|
||||||
err2 = ext4_commit_super(sb);
|
err2 = ext4_commit_super(sb);
|
||||||
err = err ? : err2;
|
err = err ? : err2;
|
||||||
|
@ -7037,20 +7063,8 @@ int ext4_enable_quotas(struct super_block *sb)
|
||||||
"(type=%d, err=%d, ino=%lu). "
|
"(type=%d, err=%d, ino=%lu). "
|
||||||
"Please run e2fsck to fix.", type,
|
"Please run e2fsck to fix.", type,
|
||||||
err, qf_inums[type]);
|
err, qf_inums[type]);
|
||||||
for (type--; type >= 0; type--) {
|
|
||||||
struct inode *inode;
|
|
||||||
|
|
||||||
inode = sb_dqopt(sb)->files[type];
|
|
||||||
if (inode)
|
|
||||||
inode = igrab(inode);
|
|
||||||
dquot_quota_off(sb, type);
|
|
||||||
if (inode) {
|
|
||||||
lockdep_set_quota_inode(inode,
|
|
||||||
I_DATA_SEM_NORMAL);
|
|
||||||
iput(inode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
ext4_quotas_off(sb, type);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -223,6 +223,7 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.int
|
||||||
EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
|
EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
|
||||||
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
|
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
|
||||||
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
|
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
|
||||||
|
EXT4_RW_ATTR_SBI_UI(mb_best_avail_max_trim_order, s_mb_best_avail_max_trim_order);
|
||||||
#ifdef CONFIG_EXT4_DEBUG
|
#ifdef CONFIG_EXT4_DEBUG
|
||||||
EXT4_RW_ATTR_SBI_UL(simulate_fail, s_simulate_fail);
|
EXT4_RW_ATTR_SBI_UL(simulate_fail, s_simulate_fail);
|
||||||
#endif
|
#endif
|
||||||
|
@ -273,6 +274,7 @@ static struct attribute *ext4_attrs[] = {
|
||||||
ATTR_LIST(warning_ratelimit_burst),
|
ATTR_LIST(warning_ratelimit_burst),
|
||||||
ATTR_LIST(msg_ratelimit_interval_ms),
|
ATTR_LIST(msg_ratelimit_interval_ms),
|
||||||
ATTR_LIST(msg_ratelimit_burst),
|
ATTR_LIST(msg_ratelimit_burst),
|
||||||
|
ATTR_LIST(mb_best_avail_max_trim_order),
|
||||||
ATTR_LIST(errors_count),
|
ATTR_LIST(errors_count),
|
||||||
ATTR_LIST(warning_count),
|
ATTR_LIST(warning_count),
|
||||||
ATTR_LIST(msg_count),
|
ATTR_LIST(msg_count),
|
||||||
|
|
|
@ -1557,8 +1557,21 @@ static int journal_reset(journal_t *journal)
|
||||||
journal->j_first = first;
|
journal->j_first = first;
|
||||||
journal->j_last = last;
|
journal->j_last = last;
|
||||||
|
|
||||||
journal->j_head = journal->j_first;
|
if (journal->j_head != 0 && journal->j_flags & JBD2_CYCLE_RECORD) {
|
||||||
journal->j_tail = journal->j_first;
|
/*
|
||||||
|
* Disable the cycled recording mode if the journal head block
|
||||||
|
* number is not correct.
|
||||||
|
*/
|
||||||
|
if (journal->j_head < first || journal->j_head >= last) {
|
||||||
|
printk(KERN_WARNING "JBD2: Incorrect Journal head block %lu, "
|
||||||
|
"disable journal_cycle_record\n",
|
||||||
|
journal->j_head);
|
||||||
|
journal->j_head = journal->j_first;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
journal->j_head = journal->j_first;
|
||||||
|
}
|
||||||
|
journal->j_tail = journal->j_head;
|
||||||
journal->j_free = journal->j_last - journal->j_first;
|
journal->j_free = journal->j_last - journal->j_first;
|
||||||
|
|
||||||
journal->j_tail_sequence = journal->j_transaction_sequence;
|
journal->j_tail_sequence = journal->j_transaction_sequence;
|
||||||
|
@ -1730,6 +1743,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags)
|
||||||
|
|
||||||
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
|
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
|
||||||
sb->s_start = cpu_to_be32(0);
|
sb->s_start = cpu_to_be32(0);
|
||||||
|
sb->s_head = cpu_to_be32(journal->j_head);
|
||||||
if (jbd2_has_feature_fast_commit(journal)) {
|
if (jbd2_has_feature_fast_commit(journal)) {
|
||||||
/*
|
/*
|
||||||
* When journal is clean, no need to commit fast commit flag and
|
* When journal is clean, no need to commit fast commit flag and
|
||||||
|
@ -1903,6 +1917,9 @@ static int journal_get_superblock(journal_t *journal)
|
||||||
bh = journal->j_sb_buffer;
|
bh = journal->j_sb_buffer;
|
||||||
|
|
||||||
J_ASSERT(bh != NULL);
|
J_ASSERT(bh != NULL);
|
||||||
|
if (buffer_verified(bh))
|
||||||
|
return 0;
|
||||||
|
|
||||||
err = bh_read(bh, 0);
|
err = bh_read(bh, 0);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
printk(KERN_ERR
|
printk(KERN_ERR
|
||||||
|
@ -1910,9 +1927,6 @@ static int journal_get_superblock(journal_t *journal)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buffer_verified(bh))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
sb = journal->j_superblock;
|
sb = journal->j_superblock;
|
||||||
|
|
||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
|
@ -1923,21 +1937,13 @@ static int journal_get_superblock(journal_t *journal)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch(be32_to_cpu(sb->s_header.h_blocktype)) {
|
if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 &&
|
||||||
case JBD2_SUPERBLOCK_V1:
|
be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) {
|
||||||
journal->j_format_version = 1;
|
|
||||||
break;
|
|
||||||
case JBD2_SUPERBLOCK_V2:
|
|
||||||
journal->j_format_version = 2;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
|
printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
|
if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
|
||||||
journal->j_total_len = be32_to_cpu(sb->s_maxlen);
|
|
||||||
else if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
|
|
||||||
printk(KERN_WARNING "JBD2: journal file too short\n");
|
printk(KERN_WARNING "JBD2: journal file too short\n");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -1980,25 +1986,14 @@ static int journal_get_superblock(journal_t *journal)
|
||||||
journal->j_chksum_driver = NULL;
|
journal->j_chksum_driver = NULL;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (jbd2_journal_has_csum_v2or3(journal)) {
|
|
||||||
/* Check superblock checksum */
|
/* Check superblock checksum */
|
||||||
if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
|
if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
|
||||||
printk(KERN_ERR "JBD2: journal checksum error\n");
|
printk(KERN_ERR "JBD2: journal checksum error\n");
|
||||||
err = -EFSBADCRC;
|
err = -EFSBADCRC;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Precompute checksum seed for all metadata */
|
|
||||||
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
|
|
||||||
sizeof(sb->s_uuid));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
journal->j_revoke_records_per_block =
|
|
||||||
journal_revoke_records_per_block(journal);
|
|
||||||
set_buffer_verified(bh);
|
set_buffer_verified(bh);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
@ -2029,6 +2024,15 @@ static int load_superblock(journal_t *journal)
|
||||||
journal->j_errno = be32_to_cpu(sb->s_errno);
|
journal->j_errno = be32_to_cpu(sb->s_errno);
|
||||||
journal->j_last = be32_to_cpu(sb->s_maxlen);
|
journal->j_last = be32_to_cpu(sb->s_maxlen);
|
||||||
|
|
||||||
|
if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
|
||||||
|
journal->j_total_len = be32_to_cpu(sb->s_maxlen);
|
||||||
|
/* Precompute checksum seed for all metadata */
|
||||||
|
if (jbd2_journal_has_csum_v2or3(journal))
|
||||||
|
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
|
||||||
|
sizeof(sb->s_uuid));
|
||||||
|
journal->j_revoke_records_per_block =
|
||||||
|
journal_revoke_records_per_block(journal);
|
||||||
|
|
||||||
if (jbd2_has_feature_fast_commit(journal)) {
|
if (jbd2_has_feature_fast_commit(journal)) {
|
||||||
journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
|
journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
|
||||||
num_fc_blocks = jbd2_journal_get_num_fc_blks(sb);
|
num_fc_blocks = jbd2_journal_get_num_fc_blks(sb);
|
||||||
|
@ -2060,10 +2064,12 @@ int jbd2_journal_load(journal_t *journal)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
sb = journal->j_superblock;
|
sb = journal->j_superblock;
|
||||||
/* If this is a V2 superblock, then we have to check the
|
|
||||||
* features flags on it. */
|
|
||||||
|
|
||||||
if (journal->j_format_version >= 2) {
|
/*
|
||||||
|
* If this is a V2 superblock, then we have to check the
|
||||||
|
* features flags on it.
|
||||||
|
*/
|
||||||
|
if (jbd2_format_support_feature(journal)) {
|
||||||
if ((sb->s_feature_ro_compat &
|
if ((sb->s_feature_ro_compat &
|
||||||
~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
|
~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
|
||||||
(sb->s_feature_incompat &
|
(sb->s_feature_incompat &
|
||||||
|
@ -2221,11 +2227,9 @@ int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
|
||||||
|
|
||||||
if (!compat && !ro && !incompat)
|
if (!compat && !ro && !incompat)
|
||||||
return 1;
|
return 1;
|
||||||
/* Load journal superblock if it is not loaded yet. */
|
if (journal_get_superblock(journal))
|
||||||
if (journal->j_format_version == 0 &&
|
|
||||||
journal_get_superblock(journal) != 0)
|
|
||||||
return 0;
|
return 0;
|
||||||
if (journal->j_format_version == 1)
|
if (!jbd2_format_support_feature(journal))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
sb = journal->j_superblock;
|
sb = journal->j_superblock;
|
||||||
|
@ -2255,11 +2259,7 @@ int jbd2_journal_check_available_features(journal_t *journal, unsigned long comp
|
||||||
if (!compat && !ro && !incompat)
|
if (!compat && !ro && !incompat)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/* We can support any known requested features iff the
|
if (!jbd2_format_support_feature(journal))
|
||||||
* superblock is in version 2. Otherwise we fail to support any
|
|
||||||
* extended sb features. */
|
|
||||||
|
|
||||||
if (journal->j_format_version != 2)
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if ((compat & JBD2_KNOWN_COMPAT_FEATURES) == compat &&
|
if ((compat & JBD2_KNOWN_COMPAT_FEATURES) == compat &&
|
||||||
|
|
|
@ -29,6 +29,7 @@ struct recovery_info
|
||||||
{
|
{
|
||||||
tid_t start_transaction;
|
tid_t start_transaction;
|
||||||
tid_t end_transaction;
|
tid_t end_transaction;
|
||||||
|
unsigned long head_block;
|
||||||
|
|
||||||
int nr_replays;
|
int nr_replays;
|
||||||
int nr_revokes;
|
int nr_revokes;
|
||||||
|
@ -301,11 +302,11 @@ int jbd2_journal_recover(journal_t *journal)
|
||||||
* is always zero if, and only if, the journal was cleanly
|
* is always zero if, and only if, the journal was cleanly
|
||||||
* unmounted.
|
* unmounted.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (!sb->s_start) {
|
if (!sb->s_start) {
|
||||||
jbd2_debug(1, "No recovery required, last transaction %d\n",
|
jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n",
|
||||||
be32_to_cpu(sb->s_sequence));
|
be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head));
|
||||||
journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
|
journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
|
||||||
|
journal->j_head = be32_to_cpu(sb->s_head);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -324,6 +325,9 @@ int jbd2_journal_recover(journal_t *journal)
|
||||||
/* Restart the log at the next transaction ID, thus invalidating
|
/* Restart the log at the next transaction ID, thus invalidating
|
||||||
* any existing commit records in the log. */
|
* any existing commit records in the log. */
|
||||||
journal->j_transaction_sequence = ++info.end_transaction;
|
journal->j_transaction_sequence = ++info.end_transaction;
|
||||||
|
journal->j_head = info.head_block;
|
||||||
|
jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n",
|
||||||
|
journal->j_transaction_sequence, journal->j_head);
|
||||||
|
|
||||||
jbd2_journal_clear_revoke(journal);
|
jbd2_journal_clear_revoke(journal);
|
||||||
err2 = sync_blockdev(journal->j_fs_dev);
|
err2 = sync_blockdev(journal->j_fs_dev);
|
||||||
|
@ -364,6 +368,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
|
||||||
if (err) {
|
if (err) {
|
||||||
printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
|
printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
|
||||||
++journal->j_transaction_sequence;
|
++journal->j_transaction_sequence;
|
||||||
|
journal->j_head = journal->j_first;
|
||||||
} else {
|
} else {
|
||||||
#ifdef CONFIG_JBD2_DEBUG
|
#ifdef CONFIG_JBD2_DEBUG
|
||||||
int dropped = info.end_transaction -
|
int dropped = info.end_transaction -
|
||||||
|
@ -373,6 +378,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
|
||||||
dropped, (dropped == 1) ? "" : "s");
|
dropped, (dropped == 1) ? "" : "s");
|
||||||
#endif
|
#endif
|
||||||
journal->j_transaction_sequence = ++info.end_transaction;
|
journal->j_transaction_sequence = ++info.end_transaction;
|
||||||
|
journal->j_head = info.head_block;
|
||||||
}
|
}
|
||||||
|
|
||||||
journal->j_tail = 0;
|
journal->j_tail = 0;
|
||||||
|
@ -462,7 +468,7 @@ static int do_one_pass(journal_t *journal,
|
||||||
struct recovery_info *info, enum passtype pass)
|
struct recovery_info *info, enum passtype pass)
|
||||||
{
|
{
|
||||||
unsigned int first_commit_ID, next_commit_ID;
|
unsigned int first_commit_ID, next_commit_ID;
|
||||||
unsigned long next_log_block;
|
unsigned long next_log_block, head_block;
|
||||||
int err, success = 0;
|
int err, success = 0;
|
||||||
journal_superblock_t * sb;
|
journal_superblock_t * sb;
|
||||||
journal_header_t * tmp;
|
journal_header_t * tmp;
|
||||||
|
@ -485,6 +491,7 @@ static int do_one_pass(journal_t *journal,
|
||||||
sb = journal->j_superblock;
|
sb = journal->j_superblock;
|
||||||
next_commit_ID = be32_to_cpu(sb->s_sequence);
|
next_commit_ID = be32_to_cpu(sb->s_sequence);
|
||||||
next_log_block = be32_to_cpu(sb->s_start);
|
next_log_block = be32_to_cpu(sb->s_start);
|
||||||
|
head_block = next_log_block;
|
||||||
|
|
||||||
first_commit_ID = next_commit_ID;
|
first_commit_ID = next_commit_ID;
|
||||||
if (pass == PASS_SCAN)
|
if (pass == PASS_SCAN)
|
||||||
|
@ -809,6 +816,7 @@ static int do_one_pass(journal_t *journal,
|
||||||
if (commit_time < last_trans_commit_time)
|
if (commit_time < last_trans_commit_time)
|
||||||
goto ignore_crc_mismatch;
|
goto ignore_crc_mismatch;
|
||||||
info->end_transaction = next_commit_ID;
|
info->end_transaction = next_commit_ID;
|
||||||
|
info->head_block = head_block;
|
||||||
|
|
||||||
if (!jbd2_has_feature_async_commit(journal)) {
|
if (!jbd2_has_feature_async_commit(journal)) {
|
||||||
journal->j_failed_commit =
|
journal->j_failed_commit =
|
||||||
|
@ -817,8 +825,10 @@ static int do_one_pass(journal_t *journal,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (pass == PASS_SCAN)
|
if (pass == PASS_SCAN) {
|
||||||
last_trans_commit_time = commit_time;
|
last_trans_commit_time = commit_time;
|
||||||
|
head_block = next_log_block;
|
||||||
|
}
|
||||||
brelse(bh);
|
brelse(bh);
|
||||||
next_commit_ID++;
|
next_commit_ID++;
|
||||||
continue;
|
continue;
|
||||||
|
@ -868,6 +878,8 @@ static int do_one_pass(journal_t *journal,
|
||||||
if (pass == PASS_SCAN) {
|
if (pass == PASS_SCAN) {
|
||||||
if (!info->end_transaction)
|
if (!info->end_transaction)
|
||||||
info->end_transaction = next_commit_ID;
|
info->end_transaction = next_commit_ID;
|
||||||
|
if (!info->head_block)
|
||||||
|
info->head_block = head_block;
|
||||||
} else {
|
} else {
|
||||||
/* It's really bad news if different passes end up at
|
/* It's really bad news if different passes end up at
|
||||||
* different places (but possible due to IO errors). */
|
* different places (but possible due to IO errors). */
|
||||||
|
|
|
@ -265,8 +265,10 @@ typedef struct journal_superblock_s
|
||||||
__u8 s_padding2[3];
|
__u8 s_padding2[3];
|
||||||
/* 0x0054 */
|
/* 0x0054 */
|
||||||
__be32 s_num_fc_blks; /* Number of fast commit blocks */
|
__be32 s_num_fc_blks; /* Number of fast commit blocks */
|
||||||
/* 0x0058 */
|
__be32 s_head; /* blocknr of head of log, only uptodate
|
||||||
__u32 s_padding[41];
|
* while the filesystem is clean */
|
||||||
|
/* 0x005C */
|
||||||
|
__u32 s_padding[40];
|
||||||
__be32 s_checksum; /* crc32c(superblock) */
|
__be32 s_checksum; /* crc32c(superblock) */
|
||||||
|
|
||||||
/* 0x0100 */
|
/* 0x0100 */
|
||||||
|
@ -274,17 +276,6 @@ typedef struct journal_superblock_s
|
||||||
/* 0x0400 */
|
/* 0x0400 */
|
||||||
} journal_superblock_t;
|
} journal_superblock_t;
|
||||||
|
|
||||||
/* Use the jbd2_{has,set,clear}_feature_* helpers; these will be removed */
|
|
||||||
#define JBD2_HAS_COMPAT_FEATURE(j,mask) \
|
|
||||||
((j)->j_format_version >= 2 && \
|
|
||||||
((j)->j_superblock->s_feature_compat & cpu_to_be32((mask))))
|
|
||||||
#define JBD2_HAS_RO_COMPAT_FEATURE(j,mask) \
|
|
||||||
((j)->j_format_version >= 2 && \
|
|
||||||
((j)->j_superblock->s_feature_ro_compat & cpu_to_be32((mask))))
|
|
||||||
#define JBD2_HAS_INCOMPAT_FEATURE(j,mask) \
|
|
||||||
((j)->j_format_version >= 2 && \
|
|
||||||
((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
|
|
||||||
|
|
||||||
#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001
|
#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001
|
||||||
|
|
||||||
#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001
|
#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001
|
||||||
|
@ -803,11 +794,6 @@ struct journal_s
|
||||||
*/
|
*/
|
||||||
journal_superblock_t *j_superblock;
|
journal_superblock_t *j_superblock;
|
||||||
|
|
||||||
/**
|
|
||||||
* @j_format_version: Version of the superblock format.
|
|
||||||
*/
|
|
||||||
int j_format_version;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @j_state_lock: Protect the various scalars in the journal.
|
* @j_state_lock: Protect the various scalars in the journal.
|
||||||
*/
|
*/
|
||||||
|
@ -1324,11 +1310,22 @@ struct journal_s
|
||||||
rwsem_release(&j->j_trans_commit_map, _THIS_IP_); \
|
rwsem_release(&j->j_trans_commit_map, _THIS_IP_); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We can support any known requested features iff the
|
||||||
|
* superblock is not in version 1. Otherwise we fail to support any
|
||||||
|
* extended sb features.
|
||||||
|
*/
|
||||||
|
static inline bool jbd2_format_support_feature(journal_t *j)
|
||||||
|
{
|
||||||
|
return j->j_superblock->s_header.h_blocktype !=
|
||||||
|
cpu_to_be32(JBD2_SUPERBLOCK_V1);
|
||||||
|
}
|
||||||
|
|
||||||
/* journal feature predicate functions */
|
/* journal feature predicate functions */
|
||||||
#define JBD2_FEATURE_COMPAT_FUNCS(name, flagname) \
|
#define JBD2_FEATURE_COMPAT_FUNCS(name, flagname) \
|
||||||
static inline bool jbd2_has_feature_##name(journal_t *j) \
|
static inline bool jbd2_has_feature_##name(journal_t *j) \
|
||||||
{ \
|
{ \
|
||||||
return ((j)->j_format_version >= 2 && \
|
return (jbd2_format_support_feature(j) && \
|
||||||
((j)->j_superblock->s_feature_compat & \
|
((j)->j_superblock->s_feature_compat & \
|
||||||
cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname)) != 0); \
|
cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname)) != 0); \
|
||||||
} \
|
} \
|
||||||
|
@ -1346,7 +1343,7 @@ static inline void jbd2_clear_feature_##name(journal_t *j) \
|
||||||
#define JBD2_FEATURE_RO_COMPAT_FUNCS(name, flagname) \
|
#define JBD2_FEATURE_RO_COMPAT_FUNCS(name, flagname) \
|
||||||
static inline bool jbd2_has_feature_##name(journal_t *j) \
|
static inline bool jbd2_has_feature_##name(journal_t *j) \
|
||||||
{ \
|
{ \
|
||||||
return ((j)->j_format_version >= 2 && \
|
return (jbd2_format_support_feature(j) && \
|
||||||
((j)->j_superblock->s_feature_ro_compat & \
|
((j)->j_superblock->s_feature_ro_compat & \
|
||||||
cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname)) != 0); \
|
cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname)) != 0); \
|
||||||
} \
|
} \
|
||||||
|
@ -1364,7 +1361,7 @@ static inline void jbd2_clear_feature_##name(journal_t *j) \
|
||||||
#define JBD2_FEATURE_INCOMPAT_FUNCS(name, flagname) \
|
#define JBD2_FEATURE_INCOMPAT_FUNCS(name, flagname) \
|
||||||
static inline bool jbd2_has_feature_##name(journal_t *j) \
|
static inline bool jbd2_has_feature_##name(journal_t *j) \
|
||||||
{ \
|
{ \
|
||||||
return ((j)->j_format_version >= 2 && \
|
return (jbd2_format_support_feature(j) && \
|
||||||
((j)->j_superblock->s_feature_incompat & \
|
((j)->j_superblock->s_feature_incompat & \
|
||||||
cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname)) != 0); \
|
cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname)) != 0); \
|
||||||
} \
|
} \
|
||||||
|
@ -1400,6 +1397,9 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
|
||||||
#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
|
#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
|
||||||
* data write error in ordered
|
* data write error in ordered
|
||||||
* mode */
|
* mode */
|
||||||
|
#define JBD2_CYCLE_RECORD 0x080 /* Journal cycled record log on
|
||||||
|
* clean and empty filesystem
|
||||||
|
* logging area */
|
||||||
#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */
|
#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */
|
||||||
#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */
|
#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */
|
||||||
#define JBD2_JOURNAL_FLUSH_DISCARD 0x0001
|
#define JBD2_JOURNAL_FLUSH_DISCARD 0x0001
|
||||||
|
|
|
@ -120,6 +120,20 @@ TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX);
|
||||||
{ EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}, \
|
{ EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}, \
|
||||||
{ EXT4_FC_REASON_ENCRYPTED_FILENAME, "ENCRYPTED_FILENAME"})
|
{ EXT4_FC_REASON_ENCRYPTED_FILENAME, "ENCRYPTED_FILENAME"})
|
||||||
|
|
||||||
|
TRACE_DEFINE_ENUM(CR_POWER2_ALIGNED);
|
||||||
|
TRACE_DEFINE_ENUM(CR_GOAL_LEN_FAST);
|
||||||
|
TRACE_DEFINE_ENUM(CR_BEST_AVAIL_LEN);
|
||||||
|
TRACE_DEFINE_ENUM(CR_GOAL_LEN_SLOW);
|
||||||
|
TRACE_DEFINE_ENUM(CR_ANY_FREE);
|
||||||
|
|
||||||
|
#define show_criteria(cr) \
|
||||||
|
__print_symbolic(cr, \
|
||||||
|
{ CR_POWER2_ALIGNED, "CR_POWER2_ALIGNED" }, \
|
||||||
|
{ CR_GOAL_LEN_FAST, "CR_GOAL_LEN_FAST" }, \
|
||||||
|
{ CR_BEST_AVAIL_LEN, "CR_BEST_AVAIL_LEN" }, \
|
||||||
|
{ CR_GOAL_LEN_SLOW, "CR_GOAL_LEN_SLOW" }, \
|
||||||
|
{ CR_ANY_FREE, "CR_ANY_FREE" })
|
||||||
|
|
||||||
TRACE_EVENT(ext4_other_inode_update_time,
|
TRACE_EVENT(ext4_other_inode_update_time,
|
||||||
TP_PROTO(struct inode *inode, ino_t orig_ino),
|
TP_PROTO(struct inode *inode, ino_t orig_ino),
|
||||||
|
|
||||||
|
@ -560,10 +574,10 @@ TRACE_EVENT(ext4_writepages_result,
|
||||||
(unsigned long) __entry->writeback_index)
|
(unsigned long) __entry->writeback_index)
|
||||||
);
|
);
|
||||||
|
|
||||||
DECLARE_EVENT_CLASS(ext4__page_op,
|
DECLARE_EVENT_CLASS(ext4__folio_op,
|
||||||
TP_PROTO(struct page *page),
|
TP_PROTO(struct inode *inode, struct folio *folio),
|
||||||
|
|
||||||
TP_ARGS(page),
|
TP_ARGS(inode, folio),
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__field( dev_t, dev )
|
__field( dev_t, dev )
|
||||||
|
@ -573,29 +587,29 @@ DECLARE_EVENT_CLASS(ext4__page_op,
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
__entry->dev = page->mapping->host->i_sb->s_dev;
|
__entry->dev = inode->i_sb->s_dev;
|
||||||
__entry->ino = page->mapping->host->i_ino;
|
__entry->ino = inode->i_ino;
|
||||||
__entry->index = page->index;
|
__entry->index = folio->index;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("dev %d,%d ino %lu page_index %lu",
|
TP_printk("dev %d,%d ino %lu folio_index %lu",
|
||||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
(unsigned long) __entry->ino,
|
(unsigned long) __entry->ino,
|
||||||
(unsigned long) __entry->index)
|
(unsigned long) __entry->index)
|
||||||
);
|
);
|
||||||
|
|
||||||
DEFINE_EVENT(ext4__page_op, ext4_readpage,
|
DEFINE_EVENT(ext4__folio_op, ext4_read_folio,
|
||||||
|
|
||||||
TP_PROTO(struct page *page),
|
TP_PROTO(struct inode *inode, struct folio *folio),
|
||||||
|
|
||||||
TP_ARGS(page)
|
TP_ARGS(inode, folio)
|
||||||
);
|
);
|
||||||
|
|
||||||
DEFINE_EVENT(ext4__page_op, ext4_releasepage,
|
DEFINE_EVENT(ext4__folio_op, ext4_release_folio,
|
||||||
|
|
||||||
TP_PROTO(struct page *page),
|
TP_PROTO(struct inode *inode, struct folio *folio),
|
||||||
|
|
||||||
TP_ARGS(page)
|
TP_ARGS(inode, folio)
|
||||||
);
|
);
|
||||||
|
|
||||||
DECLARE_EVENT_CLASS(ext4_invalidate_folio_op,
|
DECLARE_EVENT_CLASS(ext4_invalidate_folio_op,
|
||||||
|
@ -1063,7 +1077,7 @@ TRACE_EVENT(ext4_mballoc_alloc,
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
|
TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
|
||||||
"result %u/%d/%u@%u blks %u grps %u cr %u flags %s "
|
"result %u/%d/%u@%u blks %u grps %u cr %s flags %s "
|
||||||
"tail %u broken %u",
|
"tail %u broken %u",
|
||||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
(unsigned long) __entry->ino,
|
(unsigned long) __entry->ino,
|
||||||
|
@ -1073,7 +1087,7 @@ TRACE_EVENT(ext4_mballoc_alloc,
|
||||||
__entry->goal_len, __entry->goal_logical,
|
__entry->goal_len, __entry->goal_logical,
|
||||||
__entry->result_group, __entry->result_start,
|
__entry->result_group, __entry->result_start,
|
||||||
__entry->result_len, __entry->result_logical,
|
__entry->result_len, __entry->result_logical,
|
||||||
__entry->found, __entry->groups, __entry->cr,
|
__entry->found, __entry->groups, show_criteria(__entry->cr),
|
||||||
show_mballoc_flags(__entry->flags), __entry->tail,
|
show_mballoc_flags(__entry->flags), __entry->tail,
|
||||||
__entry->buddy ? 1 << __entry->buddy : 0)
|
__entry->buddy ? 1 << __entry->buddy : 0)
|
||||||
);
|
);
|
||||||
|
|
Loading…
Reference in New Issue