From a64c8610bd3b753c6aff58f51c04cdf0ae478c18 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 27 Mar 2009 22:14:10 -0400 Subject: [PATCH 1/4] block_write_full_page: Use synchronous writes for WBC_SYNC_ALL writebacks When doing synchronous writes because wbc->sync_mode is set to WBC_SYNC_ALL, send the write request using WRITE_SYNC, so that we don't unduly block system calls such as fsync(). Signed-off-by: "Theodore Ts'o" Acked-by: Jan Kara --- fs/buffer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 891e1c78e4f1..e7ebd95e0c68 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1714,6 +1714,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; const unsigned blocksize = 1 << inode->i_blkbits; int nr_underway = 0; + int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); BUG_ON(!PageLocked(page)); @@ -1805,7 +1806,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(WRITE, bh); + submit_bh(write_op, bh); nr_underway++; } bh = next; @@ -1859,7 +1860,7 @@ recover: struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh(WRITE, bh); + submit_bh(write_op, bh); nr_underway++; } bh = next; From 512a004382f2c60d5c4f855476ba965adc00250c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 27 Mar 2009 22:14:27 -0400 Subject: [PATCH 2/4] ext3: Use WRITE_SYNC for commits which are caused by fsync() If a commit is triggered by fsync(), set a flag indicating the journal blocks associated with the transaction should be flushed out using WRITE_SYNC. Signed-off-by: "Theodore Ts'o" Acked-by: Jan Kara --- fs/jbd/commit.c | 23 +++++++++++++++-------- fs/jbd/transaction.c | 2 ++ include/linux/jbd.h | 5 +++++ 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 3fbffb1ea714..f8077b9c8981 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -20,6 +20,7 @@ #include #include #include +#include /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -171,14 +172,15 @@ static int journal_write_commit_record(journal_t *journal, return (ret == -EIO); } -static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) +static void journal_do_submit_data(struct buffer_head **wbuf, int bufs, + int write_op) { int i; for (i = 0; i < bufs; i++) { wbuf[i]->b_end_io = end_buffer_write_sync; /* We use-up our safety reference in submit_bh() */ - submit_bh(WRITE, wbuf[i]); + submit_bh(write_op, wbuf[i]); } } @@ -186,7 +188,8 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) * Submit all the data buffers to disk */ static int journal_submit_data_buffers(journal_t *journal, - transaction_t *commit_transaction) + transaction_t *commit_transaction, + int write_op) { struct journal_head *jh; struct buffer_head *bh; @@ -225,7 +228,7 @@ write_out_data: BUFFER_TRACE(bh, "needs blocking lock"); spin_unlock(&journal->j_list_lock); /* Write out all data to prevent deadlocks */ - journal_do_submit_data(wbuf, bufs); + journal_do_submit_data(wbuf, bufs, write_op); bufs = 0; lock_buffer(bh); spin_lock(&journal->j_list_lock); @@ -256,7 +259,7 @@ write_out_data: jbd_unlock_bh_state(bh); if (bufs == journal->j_wbufsize) { spin_unlock(&journal->j_list_lock); - journal_do_submit_data(wbuf, bufs); + journal_do_submit_data(wbuf, bufs, write_op); bufs = 0; goto write_out_data; } @@ -286,7 +289,7 @@ write_out_data: } } spin_unlock(&journal->j_list_lock); - journal_do_submit_data(wbuf, bufs); + journal_do_submit_data(wbuf, bufs, write_op); return err; } @@ -315,6 +318,7 @@ void journal_commit_transaction(journal_t *journal) int first_tag = 0; int tag_flag; int i; + int write_op = WRITE; /* * First job: lock down the current transaction and wait for @@ -347,6 +351,8 @@ void journal_commit_transaction(journal_t *journal) spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_LOCKED; + if (commit_transaction->t_synchronous_commit) + write_op = WRITE_SYNC; spin_lock(&commit_transaction->t_handle_lock); while (commit_transaction->t_updates) { DEFINE_WAIT(wait); @@ -431,7 +437,8 @@ void journal_commit_transaction(journal_t *journal) * Now start flushing things to disk, in the order they appear * on the transaction lists. Data blocks go first. */ - err = journal_submit_data_buffers(journal, commit_transaction); + err = journal_submit_data_buffers(journal, commit_transaction, + write_op); /* * Wait for all previously submitted IO to complete. @@ -660,7 +667,7 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(WRITE, bh); + submit_bh(write_op, bh); } cond_resched(); diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index e6a117431277..ed886e6db399 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1440,6 +1440,8 @@ int journal_stop(handle_t *handle) } } + if (handle->h_sync) + transaction->t_synchronous_commit = 1; current->journal_info = NULL; spin_lock(&journal->j_state_lock); spin_lock(&transaction->t_handle_lock); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 64246dce5663..2c6943152c21 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -552,6 +552,11 @@ struct transaction_s */ int t_handle_count; + /* + * This transaction is being forced and some process is + * waiting for it to finish. + */ + int t_synchronous_commit:1; }; /** From f7ab34ea723ed304b19698efca85d6f40cecd99b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 3 Apr 2009 01:34:35 -0400 Subject: [PATCH 3/4] ext3: Add replace-on-truncate hueristics for data=writeback mode In data=writeback mode, start an asynchronous flush when closing a file which had been previously truncated down to zero. This lowers the probability of data loss in the case of applications that attempt to replace a file using truncate. Signed-off-by: "Theodore Ts'o" --- fs/ext3/file.c | 4 ++++ fs/ext3/inode.c | 3 +++ include/linux/ext3_fs.h | 1 + 3 files changed, 8 insertions(+) diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 3be1e0689c9a..4a04cbb1c231 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -33,6 +33,10 @@ */ static int ext3_release_file (struct inode * inode, struct file * filp) { + if (EXT3_I(inode)->i_state & EXT3_STATE_FLUSH_ON_CLOSE) { + filemap_flush(inode->i_mapping); + EXT3_I(inode)->i_state &= ~EXT3_STATE_FLUSH_ON_CLOSE; + } /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && (atomic_read(&inode->i_writecount) == 1)) diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5fa453b49a64..0f5bca0d82fc 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2346,6 +2346,9 @@ void ext3_truncate(struct inode *inode) if (!ext3_can_truncate(inode)) return; + if (inode->i_size == 0 && ext3_should_writeback_data(inode)) + ei->i_state |= EXT3_STATE_FLUSH_ON_CLOSE; + /* * We have to lock the EOF page here, because lock_page() nests * outside journal_start(). diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index dd495b8c3091..d2630c56cb34 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -208,6 +208,7 @@ static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags) #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ #define EXT3_STATE_XATTR 0x00000004 /* has in-inode xattrs */ +#define EXT3_STATE_FLUSH_ON_CLOSE 0x00000008 /* Used to pass group descriptor data when online resize is done */ struct ext3_new_group_input { From e7c8f5079ed9ec9e6eb1abe3defc5fb4ebfdf1cb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 3 Apr 2009 01:34:49 -0400 Subject: [PATCH 4/4] ext3: Add replace-on-rename hueristics for data=writeback mode In data=writeback mode, start an asynchronous flush when renaming a file on top of an already-existing file. This lowers the probability of data loss in the case of applications that attempt to replace a file via using rename(). Signed-off-by: "Theodore Ts'o" --- fs/ext3/namei.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 4db4ffa1edad..ab98a66ab8c7 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2265,7 +2265,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, struct inode * old_inode, * new_inode; struct buffer_head * old_bh, * new_bh, * dir_bh; struct ext3_dir_entry_2 * old_de, * new_de; - int retval; + int retval, flush_file = 0; old_bh = new_bh = dir_bh = NULL; @@ -2401,6 +2401,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, ext3_mark_inode_dirty(handle, new_inode); if (!new_inode->i_nlink) ext3_orphan_add(handle, new_inode); + if (ext3_should_writeback_data(new_inode)) + flush_file = 1; } retval = 0; @@ -2409,6 +2411,8 @@ end_rename: brelse (old_bh); brelse (new_bh); ext3_journal_stop(handle); + if (retval == 0 && flush_file) + filemap_flush(old_inode->i_mapping); return retval; }