diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 560f88dc7090..0c5086db8352 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -139,8 +139,16 @@ commit=nrsec (*) Ext4 can be told to sync all its data and metadata Setting it to very large values will improve performance. -barrier=1 This enables/disables barriers. barrier=0 disables - it, barrier=1 enables it. +barrier=<0|1(*)> This enables/disables the use of write barriers in + the jbd code. barrier=0 disables, barrier=1 enables. + This also requires an IO stack which can support + barriers, and if jbd gets an error on a barrier + write, it will disable again with a warning. + Write barriers enforce proper on-disk ordering + of journal commits, making volatile disk write caches + safe to use, at some performance penalty. If + your disks are battery-backed in one way or another, + disabling barriers may safely improve performance. orlov (*) This enables the new Orlov block allocator. It is enabled by default. diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 30494c5da843..9cc80b9cc8d8 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -43,6 +43,46 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, } +static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, + ext4_group_t block_group) +{ + ext4_group_t actual_group; + ext4_get_group_no_and_offset(sb, block, &actual_group, 0); + if (actual_group == block_group) + return 1; + return 0; +} + +static int ext4_group_used_meta_blocks(struct super_block *sb, + ext4_group_t block_group) +{ + ext4_fsblk_t tmp; + struct ext4_sb_info *sbi = EXT4_SB(sb); + /* block bitmap, inode bitmap, and inode table blocks */ + int used_blocks = sbi->s_itb_per_group + 2; + + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { + struct ext4_group_desc *gdp; + struct buffer_head *bh; + + gdp = ext4_get_group_desc(sb, block_group, &bh); + if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), + block_group)) + used_blocks--; + + if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), + block_group)) + used_blocks--; + + tmp = ext4_inode_table(sb, gdp); + for (; tmp < ext4_inode_table(sb, gdp) + + sbi->s_itb_per_group; tmp++) { + if (!ext4_block_in_group(sb, tmp, block_group)) + used_blocks -= 1; + } + } + return used_blocks; +} /* Initializes an uninitialized block bitmap if given, and returns the * number of blocks free in the group. */ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, @@ -105,20 +145,34 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, free_blocks = group_blocks - bit_max; if (bh) { - ext4_fsblk_t start; + ext4_fsblk_t start, tmp; + int flex_bg = 0; for (bit = 0; bit < bit_max; bit++) ext4_set_bit(bit, bh->b_data); start = ext4_group_first_block_no(sb, block_group); - /* Set bits for block and inode bitmaps, and inode table */ - ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data); - ext4_set_bit(ext4_inode_bitmap(sb, gdp) - start, bh->b_data); - for (bit = (ext4_inode_table(sb, gdp) - start), - bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++) - ext4_set_bit(bit, bh->b_data); + if (EXT4_HAS_INCOMPAT_FEATURE(sb, + EXT4_FEATURE_INCOMPAT_FLEX_BG)) + flex_bg = 1; + /* Set bits for block and inode bitmaps, and inode table */ + tmp = ext4_block_bitmap(sb, gdp); + if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + ext4_set_bit(tmp - start, bh->b_data); + + tmp = ext4_inode_bitmap(sb, gdp); + if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + ext4_set_bit(tmp - start, bh->b_data); + + tmp = ext4_inode_table(sb, gdp); + for (; tmp < ext4_inode_table(sb, gdp) + + sbi->s_itb_per_group; tmp++) { + if (!flex_bg || + ext4_block_in_group(sb, tmp, block_group)) + ext4_set_bit(tmp - start, bh->b_data); + } /* * Also if the number of blocks within the group is * less than the blocksize * 8 ( which is the size @@ -126,8 +180,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); } - - return free_blocks - sbi->s_itb_per_group - 2; + return free_blocks - ext4_group_used_meta_blocks(sb, block_group); } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 873ad9b3418c..c9900aade150 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2745,8 +2745,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, sbi = EXT4_SB(sb); es = sbi->s_es; - ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, - gdp->bg_free_blocks_count); err = -EIO; bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group); @@ -2762,6 +2760,9 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (!gdp) goto out_err; + ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, + gdp->bg_free_blocks_count); + err = ext4_journal_get_write_access(handle, gdp_bh); if (err) goto out_err; @@ -3094,8 +3095,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa) { - unsigned len = ac->ac_o_ex.fe_len; - + unsigned int len = ac->ac_o_ex.fe_len; ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, &ac->ac_b_ex.fe_group, &ac->ac_b_ex.fe_start); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9f086a6a472b..9ecb92f68543 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -563,7 +563,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, } blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count; - data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count; + data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count % + EXT4_ADDR_PER_BLOCK(sb)); end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb); /* Get each reserved primary GDT block and verify it holds backups */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 09d9359c8055..cb96f127c366 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -671,6 +671,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) unsigned long def_mount_opts; struct super_block *sb = vfs->mnt_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); + journal_t *journal = sbi->s_journal; struct ext4_super_block *es = sbi->s_es; def_mount_opts = le32_to_cpu(es->s_default_mount_opts); @@ -729,8 +730,15 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_printf(seq, ",commit=%u", (unsigned) (sbi->s_commit_interval / HZ)); } - if (test_opt(sb, BARRIER)) - seq_puts(seq, ",barrier=1"); + /* + * We're changing the default of barrier mount option, so + * let's always display its mount state so it's clear what its + * status is. + */ + seq_puts(seq, ",barrier="); + seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) + seq_puts(seq, ",journal_async_commit"); if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); if (!test_opt(sb, EXTENTS)) @@ -1907,6 +1915,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_resgid = le16_to_cpu(es->s_def_resgid); set_opt(sbi->s_mount_opt, RESERVATION); + set_opt(sbi->s_mount_opt, BARRIER); /* * turn on extents feature by default in ext4 filesystem @@ -2189,6 +2198,29 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { if (ext4_load_journal(sb, es, journal_devnum)) goto failed_mount3; + if (!(sb->s_flags & MS_RDONLY) && + EXT4_SB(sb)->s_journal->j_failed_commit) { + printk(KERN_CRIT "EXT4-fs error (device %s): " + "ext4_fill_super: Journal transaction " + "%u is corrupt\n", sb->s_id, + EXT4_SB(sb)->s_journal->j_failed_commit); + if (test_opt (sb, ERRORS_RO)) { + printk (KERN_CRIT + "Mounting filesystem read-only\n"); + sb->s_flags |= MS_RDONLY; + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + } + if (test_opt(sb, ERRORS_PANIC)) { + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + ext4_commit_super(sb, es, 1); + printk(KERN_CRIT + "EXT4-fs (device %s): mount failed\n", + sb->s_id); + goto failed_mount4; + } + } } else if (journal_inum) { if (ext4_create_journal(sb, es, journal_inum)) goto failed_mount3; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 4d99685fdce4..a2ed72f7ceee 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -168,6 +168,7 @@ static int journal_submit_commit_record(journal_t *journal, spin_unlock(&journal->j_state_lock); /* And try again, without the barrier */ + lock_buffer(bh); set_buffer_uptodate(bh); set_buffer_dirty(bh); ret = submit_bh(WRITE, bh); diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 5d0405a9e7ca..058f50f65b76 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -344,6 +344,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh, *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, obh->b_size); } + put_bh(obh); } return 0; } @@ -610,9 +611,8 @@ static int do_one_pass(journal_t *journal, chksum_err = chksum_seen = 0; if (info->end_transaction) { - printk(KERN_ERR "JBD: Transaction %u " - "found to be corrupt.\n", - next_commit_ID - 1); + journal->j_failed_commit = + info->end_transaction; brelse(bh); break; } @@ -643,10 +643,8 @@ static int do_one_pass(journal_t *journal, if (!JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ - printk(KERN_ERR - "JBD: Transaction %u " - "found to be corrupt.\n", - next_commit_ID); + journal->j_failed_commit = + next_commit_ID; brelse(bh); break; } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 05e2b307161a..d147f0f90360 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -919,6 +919,9 @@ struct journal_s struct proc_dir_entry *j_proc_entry; struct transaction_stats_s j_stats; + /* Failed journal commit ID */ + unsigned int j_failed_commit; + /* * An opaque pointer to fs-private information. ext3 puts its * superblock pointer here