From cb843a6f513a1a91c54951005e60bd9b95bdf973 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Oct 2008 12:30:02 -0400 Subject: [PATCH] Btrfs: O_DIRECT writes via buffered writes + invaldiate This reworks the btrfs O_DIRECT write code a bit. It had always fallen back to buffered IO and done an invalidate, but needed to be updated for the data=ordered code. The invalidate wasn't actually removing pages because they were still inside an ordered extent. This also combines the O_DIRECT/O_SYNC paths where possible, and kicks off IO in the main btrfs_file_write loop to keep the pipe down the the disk full as we process long writes. Signed-off-by: Chris Mason --- fs/btrfs/file.c | 64 ++++++++++++++++++++++------------------- fs/btrfs/ordered-data.c | 3 +- fs/btrfs/ordered-data.h | 2 +- 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3088a1184483..a03d1bbb19ad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -905,6 +905,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct page *pinned[2]; unsigned long first_index; unsigned long last_index; + int will_write; + + will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) || + (file->f_flags & O_DIRECT)); nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / (sizeof(struct page *))); @@ -1001,15 +1005,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (ret) goto out; + if (will_write) { + btrfs_fdatawrite_range(inode->i_mapping, pos, + pos + write_bytes - 1, + WB_SYNC_NONE); + } else { + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + num_pages); + if (num_pages < + (root->leafsize >> PAGE_CACHE_SHIFT) + 1) + btrfs_btree_balance_dirty(root, 1); + btrfs_throttle(root); + } + buf += write_bytes; count -= write_bytes; pos += write_bytes; num_written += write_bytes; - balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages); - if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) - btrfs_btree_balance_dirty(root, 1); - btrfs_throttle(root); cond_resched(); } out: @@ -1023,36 +1036,29 @@ out_nolock: page_cache_release(pinned[1]); *ppos = pos; - if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { + if (num_written > 0 && will_write) { struct btrfs_trans_handle *trans; - err = btrfs_fdatawrite_range(inode->i_mapping, start_pos, - start_pos + num_written -1, - WB_SYNC_NONE); - if (err < 0) + err = btrfs_wait_ordered_range(inode, start_pos, num_written); + if (err) num_written = err; - err = btrfs_wait_on_page_writeback_range(inode->i_mapping, - start_pos, start_pos + num_written - 1); - if (err < 0) - num_written = err; - - trans = btrfs_start_transaction(root, 1); - ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); - if (ret == 0) { - btrfs_sync_log(trans, root); - btrfs_end_transaction(trans, root); - } else { - btrfs_commit_transaction(trans, root); + if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { + trans = btrfs_start_transaction(root, 1); + ret = btrfs_log_dentry_safe(trans, root, + file->f_dentry); + if (ret == 0) { + btrfs_sync_log(trans, root); + btrfs_end_transaction(trans, root); + } else { + btrfs_commit_transaction(trans, root); + } + } + if (file->f_flags & O_DIRECT) { + invalidate_mapping_pages(inode->i_mapping, + start_pos >> PAGE_CACHE_SHIFT, + (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } - } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { - do_sync_mapping_range(inode->i_mapping, start_pos, - start_pos + num_written - 1, - SYNC_FILE_RANGE_WRITE | - SYNC_FILE_RANGE_WAIT_AFTER); - invalidate_mapping_pages(inode->i_mapping, - start_pos >> PAGE_CACHE_SHIFT, - (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); } current->backing_dev_info = NULL; return num_written ? num_written : err; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index dcc1730dd837..2eb6caba57c2 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -397,7 +397,7 @@ void btrfs_start_ordered_extent(struct inode *inode, /* * Used to wait on ordered extents across a large range of bytes. */ -void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) +int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) { u64 end; u64 orig_end; @@ -451,6 +451,7 @@ again: (unsigned long long)orig_end); goto again; } + return 0; } /* diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index fd45519f30a8..f50f8870a144 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -135,7 +135,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, u64 file_offset); void btrfs_start_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry, int wait); -void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); +int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); struct btrfs_ordered_extent * btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); int btrfs_ordered_update_i_size(struct inode *inode,