ext4: add fsync batch tuning knobs
Add new mount options, min_batch_time and max_batch_time, which controls how long the jbd2 layer should wait for additional filesystem operations to get batched with a synchronous write transaction. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
parent
d7cfa4684d
commit
30773840c1
|
@ -283,6 +283,35 @@ delalloc (*) Deferring block allocation until write-out time.
|
|||
nodelalloc Disable delayed allocation. Blocks are allocation
|
||||
when data is copied from user to page cache.
|
||||
|
||||
max_batch_time=usec Maximum amount of time ext4 should wait for
|
||||
additional filesystem operations to be batch
|
||||
together with a synchronous write operation.
|
||||
Since a synchronous write operation is going to
|
||||
force a commit and then a wait for the I/O
|
||||
complete, it doesn't cost much, and can be a
|
||||
huge throughput win, we wait for a small amount
|
||||
of time to see if any other transactions can
|
||||
piggyback on the synchronous write. The
|
||||
algorithm used is designed to automatically tune
|
||||
for the speed of the disk, by measuring the
|
||||
amount of time (on average) that it takes to
|
||||
finish committing a transaction. Call this time
|
||||
the "commit time". If the time that the
|
||||
transactoin has been running is less than the
|
||||
commit time, ext4 will try sleeping for the
|
||||
commit time to see if other operations will join
|
||||
the transaction. The commit time is capped by
|
||||
the max_batch_time, which defaults to 15000us
|
||||
(15ms). This optimization can be turned off
|
||||
entirely by setting max_batch_time to 0.
|
||||
|
||||
min_batch_time=usec This parameter sets the commit time (as
|
||||
described above) to be at least min_batch_time.
|
||||
It defaults to zero microseconds. Increasing
|
||||
this parameter may improve the throughput of
|
||||
multi-threaded, synchronous workloads on very
|
||||
fast disks, at the cost of increasing latency.
|
||||
|
||||
Data Mode
|
||||
=========
|
||||
There are 3 different data modes:
|
||||
|
|
|
@ -328,6 +328,7 @@ struct ext4_mount_options {
|
|||
uid_t s_resuid;
|
||||
gid_t s_resgid;
|
||||
unsigned long s_commit_interval;
|
||||
u32 s_min_batch_time, s_max_batch_time;
|
||||
#ifdef CONFIG_QUOTA
|
||||
int s_jquota_fmt;
|
||||
char *s_qf_names[MAXQUOTAS];
|
||||
|
@ -805,6 +806,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
|
|||
#define EXT4_DEFM_JMODE_ORDERED 0x0040
|
||||
#define EXT4_DEFM_JMODE_WBACK 0x0060
|
||||
|
||||
/*
|
||||
* Default journal batch times
|
||||
*/
|
||||
#define EXT4_DEF_MIN_BATCH_TIME 0
|
||||
#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */
|
||||
|
||||
/*
|
||||
* Structure of a directory entry
|
||||
*/
|
||||
|
|
|
@ -74,6 +74,8 @@ struct ext4_sb_info {
|
|||
struct journal_s *s_journal;
|
||||
struct list_head s_orphan;
|
||||
unsigned long s_commit_interval;
|
||||
u32 s_max_batch_time;
|
||||
u32 s_min_batch_time;
|
||||
struct block_device *journal_bdev;
|
||||
#ifdef CONFIG_JBD2_DEBUG
|
||||
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
|
||||
|
|
|
@ -705,10 +705,19 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
|
|||
#endif
|
||||
if (!test_opt(sb, RESERVATION))
|
||||
seq_puts(seq, ",noreservation");
|
||||
if (sbi->s_commit_interval) {
|
||||
if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
|
||||
seq_printf(seq, ",commit=%u",
|
||||
(unsigned) (sbi->s_commit_interval / HZ));
|
||||
}
|
||||
if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
|
||||
seq_printf(seq, ",min_batch_time=%u",
|
||||
(unsigned) sbi->s_min_batch_time);
|
||||
}
|
||||
if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
|
||||
seq_printf(seq, ",max_batch_time=%u",
|
||||
(unsigned) sbi->s_min_batch_time);
|
||||
}
|
||||
|
||||
/*
|
||||
* We're changing the default of barrier mount option, so
|
||||
* let's always display its mount state so it's clear what its
|
||||
|
@ -874,7 +883,8 @@ enum {
|
|||
Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
|
||||
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
|
||||
Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
|
||||
Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
|
||||
Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
|
||||
Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
|
||||
Opt_journal_checksum, Opt_journal_async_commit,
|
||||
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
|
||||
Opt_data_err_abort, Opt_data_err_ignore,
|
||||
|
@ -913,6 +923,8 @@ static const match_table_t tokens = {
|
|||
{Opt_nobh, "nobh"},
|
||||
{Opt_bh, "bh"},
|
||||
{Opt_commit, "commit=%u"},
|
||||
{Opt_min_batch_time, "min_batch_time=%u"},
|
||||
{Opt_max_batch_time, "max_batch_time=%u"},
|
||||
{Opt_journal_update, "journal=update"},
|
||||
{Opt_journal_inum, "journal=%u"},
|
||||
{Opt_journal_dev, "journal_dev=%u"},
|
||||
|
@ -1131,6 +1143,22 @@ static int parse_options(char *options, struct super_block *sb,
|
|||
option = JBD2_DEFAULT_MAX_COMMIT_AGE;
|
||||
sbi->s_commit_interval = HZ * option;
|
||||
break;
|
||||
case Opt_max_batch_time:
|
||||
if (match_int(&args[0], &option))
|
||||
return 0;
|
||||
if (option < 0)
|
||||
return 0;
|
||||
if (option == 0)
|
||||
option = EXT4_DEF_MAX_BATCH_TIME;
|
||||
sbi->s_max_batch_time = option;
|
||||
break;
|
||||
case Opt_min_batch_time:
|
||||
if (match_int(&args[0], &option))
|
||||
return 0;
|
||||
if (option < 0)
|
||||
return 0;
|
||||
sbi->s_min_batch_time = option;
|
||||
break;
|
||||
case Opt_data_journal:
|
||||
data_opt = EXT4_MOUNT_JOURNAL_DATA;
|
||||
goto datacheck;
|
||||
|
@ -1979,6 +2007,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
|
||||
sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
|
||||
sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
|
||||
sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
|
||||
sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
|
||||
sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
|
||||
|
||||
set_opt(sbi->s_mount_opt, RESERVATION);
|
||||
set_opt(sbi->s_mount_opt, BARRIER);
|
||||
|
@ -2524,11 +2555,9 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
|
|||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (sbi->s_commit_interval)
|
||||
journal->j_commit_interval = sbi->s_commit_interval;
|
||||
/* We could also set up an ext4-specific default for the commit
|
||||
* interval here, but for now we'll just fall back to the jbd
|
||||
* default. */
|
||||
journal->j_commit_interval = sbi->s_commit_interval;
|
||||
journal->j_min_batch_time = sbi->s_min_batch_time;
|
||||
journal->j_max_batch_time = sbi->s_max_batch_time;
|
||||
|
||||
spin_lock(&journal->j_state_lock);
|
||||
if (test_opt(sb, BARRIER))
|
||||
|
@ -3042,6 +3071,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
|||
old_opts.s_resuid = sbi->s_resuid;
|
||||
old_opts.s_resgid = sbi->s_resgid;
|
||||
old_opts.s_commit_interval = sbi->s_commit_interval;
|
||||
old_opts.s_min_batch_time = sbi->s_min_batch_time;
|
||||
old_opts.s_max_batch_time = sbi->s_max_batch_time;
|
||||
#ifdef CONFIG_QUOTA
|
||||
old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
|
||||
for (i = 0; i < MAXQUOTAS; i++)
|
||||
|
@ -3178,6 +3209,8 @@ restore_opts:
|
|||
sbi->s_resuid = old_opts.s_resuid;
|
||||
sbi->s_resgid = old_opts.s_resgid;
|
||||
sbi->s_commit_interval = old_opts.s_commit_interval;
|
||||
sbi->s_min_batch_time = old_opts.s_min_batch_time;
|
||||
sbi->s_max_batch_time = old_opts.s_max_batch_time;
|
||||
#ifdef CONFIG_QUOTA
|
||||
sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
|
||||
for (i = 0; i < MAXQUOTAS; i++) {
|
||||
|
|
|
@ -964,6 +964,8 @@ static journal_t * journal_init_common (void)
|
|||
spin_lock_init(&journal->j_state_lock);
|
||||
|
||||
journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
|
||||
journal->j_min_batch_time = 0;
|
||||
journal->j_max_batch_time = 15000; /* 15ms */
|
||||
|
||||
/* The journal is marked for error until we succeed with recovery! */
|
||||
journal->j_flags = JBD2_ABORT;
|
||||
|
|
|
@ -1255,8 +1255,10 @@ int jbd2_journal_stop(handle_t *handle)
|
|||
trans_time = ktime_to_ns(ktime_sub(ktime_get(),
|
||||
transaction->t_start_time));
|
||||
|
||||
commit_time = max_t(u64, commit_time,
|
||||
1000*journal->j_min_batch_time);
|
||||
commit_time = min_t(u64, commit_time,
|
||||
1000*jiffies_to_usecs(1));
|
||||
1000*journal->j_max_batch_time);
|
||||
|
||||
if (trans_time < commit_time) {
|
||||
ktime_t expires = ktime_add_ns(ktime_get(),
|
||||
|
|
|
@ -956,6 +956,14 @@ struct journal_s
|
|||
*/
|
||||
u64 j_average_commit_time;
|
||||
|
||||
/*
|
||||
* minimum and maximum times that we should wait for
|
||||
* additional filesystem operations to get batched into a
|
||||
* synchronous handle in microseconds
|
||||
*/
|
||||
u32 j_min_batch_time;
|
||||
u32 j_max_batch_time;
|
||||
|
||||
/* This function is called when a transaction is closed */
|
||||
void (*j_commit_callback)(journal_t *,
|
||||
transaction_t *);
|
||||
|
|
Loading…
Reference in New Issue