ext4: Add delayed allocation support in data=writeback mode
Updated with fixes from Mingming Cao <cmm@us.ibm.com> to unlock and release the page from page cache if the delalloc write_begin failed, and properly handle preallocated blocks. Also added a fix to clear buffer_delay in block_write_full_page() after allocating a delayed buffer. Updated with fixes from Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> to update i_disksize properly and to add bmap support for delayed allocation. Updated with a fix from Valerie Clement <valerie.clement@bull.net> to avoid filesystem corruption when the filesystem is mounted with the delalloc option and blocksize < pagesize. Signed-off-by: Alex Tomas <alex@clusterfs.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
This commit is contained in:
parent
29a814d2ee
commit
64769240bd
|
@ -536,6 +536,7 @@ do { \
|
|||
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
|
||||
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
|
||||
#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
|
||||
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
|
||||
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
|
||||
#ifndef _LINUX_EXT2_FS_H
|
||||
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
|
||||
|
|
699
fs/ext4/inode.c
699
fs/ext4/inode.c
|
@ -32,6 +32,7 @@
|
|||
#include <linux/string.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/mpage.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/bio.h>
|
||||
|
@ -46,6 +47,8 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
|
|||
new_size);
|
||||
}
|
||||
|
||||
static void ext4_invalidatepage(struct page *page, unsigned long offset);
|
||||
|
||||
/*
|
||||
* Test whether an inode is a fast symlink.
|
||||
*/
|
||||
|
@ -1407,6 +1410,669 @@ static int ext4_journalled_write_end(struct file *file,
|
|||
return ret ? ret : copied;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delayed allocation stuff
|
||||
*/
|
||||
|
||||
struct mpage_da_data {
|
||||
struct inode *inode;
|
||||
struct buffer_head lbh; /* extent of blocks */
|
||||
unsigned long first_page, next_page; /* extent of pages */
|
||||
get_block_t *get_block;
|
||||
struct writeback_control *wbc;
|
||||
};
|
||||
|
||||
/*
|
||||
* mpage_da_submit_io - walks through extent of pages and try to write
|
||||
* them with __mpage_writepage()
|
||||
*
|
||||
* @mpd->inode: inode
|
||||
* @mpd->first_page: first page of the extent
|
||||
* @mpd->next_page: page after the last page of the extent
|
||||
* @mpd->get_block: the filesystem's block mapper function
|
||||
*
|
||||
* By the time mpage_da_submit_io() is called we expect all blocks
|
||||
* to be allocated. this may be wrong if allocation failed.
|
||||
*
|
||||
* As pages are already locked by write_cache_pages(), we can't use it
|
||||
*/
|
||||
static int mpage_da_submit_io(struct mpage_da_data *mpd)
|
||||
{
|
||||
struct address_space *mapping = mpd->inode->i_mapping;
|
||||
struct mpage_data mpd_pp = {
|
||||
.bio = NULL,
|
||||
.last_block_in_bio = 0,
|
||||
.get_block = mpd->get_block,
|
||||
.use_writepage = 1,
|
||||
};
|
||||
int ret = 0, err, nr_pages, i;
|
||||
unsigned long index, end;
|
||||
struct pagevec pvec;
|
||||
|
||||
BUG_ON(mpd->next_page <= mpd->first_page);
|
||||
|
||||
pagevec_init(&pvec, 0);
|
||||
index = mpd->first_page;
|
||||
end = mpd->next_page - 1;
|
||||
|
||||
while (index <= end) {
|
||||
/* XXX: optimize tail */
|
||||
nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
|
||||
if (nr_pages == 0)
|
||||
break;
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *page = pvec.pages[i];
|
||||
|
||||
index = page->index;
|
||||
if (index > end)
|
||||
break;
|
||||
index++;
|
||||
|
||||
err = __mpage_writepage(page, mpd->wbc, &mpd_pp);
|
||||
|
||||
/*
|
||||
* In error case, we have to continue because
|
||||
* remaining pages are still locked
|
||||
* XXX: unlock and re-dirty them?
|
||||
*/
|
||||
if (ret == 0)
|
||||
ret = err;
|
||||
}
|
||||
pagevec_release(&pvec);
|
||||
}
|
||||
if (mpd_pp.bio)
|
||||
mpage_bio_submit(WRITE, mpd_pp.bio);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
|
||||
*
|
||||
* @mpd->inode - inode to walk through
|
||||
* @exbh->b_blocknr - first block on a disk
|
||||
* @exbh->b_size - amount of space in bytes
|
||||
* @logical - first logical block to start assignment with
|
||||
*
|
||||
* the function goes through all passed space and put actual disk
|
||||
* block numbers into buffer heads, dropping BH_Delay
|
||||
*/
|
||||
static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
|
||||
struct buffer_head *exbh)
|
||||
{
|
||||
struct inode *inode = mpd->inode;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
int blocks = exbh->b_size >> inode->i_blkbits;
|
||||
sector_t pblock = exbh->b_blocknr, cur_logical;
|
||||
struct buffer_head *head, *bh;
|
||||
unsigned long index, end;
|
||||
struct pagevec pvec;
|
||||
int nr_pages, i;
|
||||
|
||||
index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||
end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||
cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||
|
||||
pagevec_init(&pvec, 0);
|
||||
|
||||
while (index <= end) {
|
||||
/* XXX: optimize tail */
|
||||
nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
|
||||
if (nr_pages == 0)
|
||||
break;
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *page = pvec.pages[i];
|
||||
|
||||
index = page->index;
|
||||
if (index > end)
|
||||
break;
|
||||
index++;
|
||||
|
||||
BUG_ON(!PageLocked(page));
|
||||
BUG_ON(PageWriteback(page));
|
||||
BUG_ON(!page_has_buffers(page));
|
||||
|
||||
bh = page_buffers(page);
|
||||
head = bh;
|
||||
|
||||
/* skip blocks out of the range */
|
||||
do {
|
||||
if (cur_logical >= logical)
|
||||
break;
|
||||
cur_logical++;
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
|
||||
do {
|
||||
if (cur_logical >= logical + blocks)
|
||||
break;
|
||||
|
||||
if (buffer_delay(bh)) {
|
||||
bh->b_blocknr = pblock;
|
||||
clear_buffer_delay(bh);
|
||||
} else if (buffer_mapped(bh)) {
|
||||
BUG_ON(bh->b_blocknr != pblock);
|
||||
}
|
||||
|
||||
cur_logical++;
|
||||
pblock++;
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
}
|
||||
pagevec_release(&pvec);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* __unmap_underlying_blocks - just a helper function to unmap
|
||||
* set of blocks described by @bh
|
||||
*/
|
||||
static inline void __unmap_underlying_blocks(struct inode *inode,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
struct block_device *bdev = inode->i_sb->s_bdev;
|
||||
int blocks, i;
|
||||
|
||||
blocks = bh->b_size >> inode->i_blkbits;
|
||||
for (i = 0; i < blocks; i++)
|
||||
unmap_underlying_metadata(bdev, bh->b_blocknr + i);
|
||||
}
|
||||
|
||||
/*
|
||||
* mpage_da_map_blocks - go through given space
|
||||
*
|
||||
* @mpd->lbh - bh describing space
|
||||
* @mpd->get_block - the filesystem's block mapper function
|
||||
*
|
||||
* The function skips space we know is already mapped to disk blocks.
|
||||
*
|
||||
* The function ignores errors ->get_block() returns, thus real
|
||||
* error handling is postponed to __mpage_writepage()
|
||||
*/
|
||||
static void mpage_da_map_blocks(struct mpage_da_data *mpd)
|
||||
{
|
||||
struct buffer_head *lbh = &mpd->lbh;
|
||||
int err = 0, remain = lbh->b_size;
|
||||
sector_t next = lbh->b_blocknr;
|
||||
struct buffer_head new;
|
||||
|
||||
/*
|
||||
* We consider only non-mapped and non-allocated blocks
|
||||
*/
|
||||
if (buffer_mapped(lbh) && !buffer_delay(lbh))
|
||||
return;
|
||||
|
||||
while (remain) {
|
||||
new.b_state = lbh->b_state;
|
||||
new.b_blocknr = 0;
|
||||
new.b_size = remain;
|
||||
err = mpd->get_block(mpd->inode, next, &new, 1);
|
||||
if (err) {
|
||||
/*
|
||||
* Rather than implement own error handling
|
||||
* here, we just leave remaining blocks
|
||||
* unallocated and try again with ->writepage()
|
||||
*/
|
||||
break;
|
||||
}
|
||||
BUG_ON(new.b_size == 0);
|
||||
|
||||
if (buffer_new(&new))
|
||||
__unmap_underlying_blocks(mpd->inode, &new);
|
||||
|
||||
/*
|
||||
* If blocks are delayed marked, we need to
|
||||
* put actual blocknr and drop delayed bit
|
||||
*/
|
||||
if (buffer_delay(lbh))
|
||||
mpage_put_bnr_to_bhs(mpd, next, &new);
|
||||
|
||||
/* go for the remaining blocks */
|
||||
next += new.b_size >> mpd->inode->i_blkbits;
|
||||
remain -= new.b_size;
|
||||
}
|
||||
}
|
||||
|
||||
#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay))
|
||||
|
||||
/*
|
||||
* mpage_add_bh_to_extent - try to add one more block to extent of blocks
|
||||
*
|
||||
* @mpd->lbh - extent of blocks
|
||||
* @logical - logical number of the block in the file
|
||||
* @bh - bh of the block (used to access block's state)
|
||||
*
|
||||
* the function is used to collect contig. blocks in same state
|
||||
*/
|
||||
static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
|
||||
sector_t logical, struct buffer_head *bh)
|
||||
{
|
||||
struct buffer_head *lbh = &mpd->lbh;
|
||||
sector_t next;
|
||||
|
||||
next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits);
|
||||
|
||||
/*
|
||||
* First block in the extent
|
||||
*/
|
||||
if (lbh->b_size == 0) {
|
||||
lbh->b_blocknr = logical;
|
||||
lbh->b_size = bh->b_size;
|
||||
lbh->b_state = bh->b_state & BH_FLAGS;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Can we merge the block to our big extent?
|
||||
*/
|
||||
if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
|
||||
lbh->b_size += bh->b_size;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We couldn't merge the block to our extent, so we
|
||||
* need to flush current extent and start new one
|
||||
*/
|
||||
mpage_da_map_blocks(mpd);
|
||||
|
||||
/*
|
||||
* Now start a new extent
|
||||
*/
|
||||
lbh->b_size = bh->b_size;
|
||||
lbh->b_state = bh->b_state & BH_FLAGS;
|
||||
lbh->b_blocknr = logical;
|
||||
}
|
||||
|
||||
/*
|
||||
* __mpage_da_writepage - finds extent of pages and blocks
|
||||
*
|
||||
* @page: page to consider
|
||||
* @wbc: not used, we just follow rules
|
||||
* @data: context
|
||||
*
|
||||
* The function finds extents of pages and scan them for all blocks.
|
||||
*/
|
||||
static int __mpage_da_writepage(struct page *page,
|
||||
struct writeback_control *wbc, void *data)
|
||||
{
|
||||
struct mpage_da_data *mpd = data;
|
||||
struct inode *inode = mpd->inode;
|
||||
struct buffer_head *bh, *head, fake;
|
||||
sector_t logical;
|
||||
|
||||
/*
|
||||
* Can we merge this page to current extent?
|
||||
*/
|
||||
if (mpd->next_page != page->index) {
|
||||
/*
|
||||
* Nope, we can't. So, we map non-allocated blocks
|
||||
* and start IO on them using __mpage_writepage()
|
||||
*/
|
||||
if (mpd->next_page != mpd->first_page) {
|
||||
mpage_da_map_blocks(mpd);
|
||||
mpage_da_submit_io(mpd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start next extent of pages ...
|
||||
*/
|
||||
mpd->first_page = page->index;
|
||||
|
||||
/*
|
||||
* ... and blocks
|
||||
*/
|
||||
mpd->lbh.b_size = 0;
|
||||
mpd->lbh.b_state = 0;
|
||||
mpd->lbh.b_blocknr = 0;
|
||||
}
|
||||
|
||||
mpd->next_page = page->index + 1;
|
||||
logical = (sector_t) page->index <<
|
||||
(PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||
|
||||
if (!page_has_buffers(page)) {
|
||||
/*
|
||||
* There is no attached buffer heads yet (mmap?)
|
||||
* we treat the page asfull of dirty blocks
|
||||
*/
|
||||
bh = &fake;
|
||||
bh->b_size = PAGE_CACHE_SIZE;
|
||||
bh->b_state = 0;
|
||||
set_buffer_dirty(bh);
|
||||
set_buffer_uptodate(bh);
|
||||
mpage_add_bh_to_extent(mpd, logical, bh);
|
||||
} else {
|
||||
/*
|
||||
* Page with regular buffer heads, just add all dirty ones
|
||||
*/
|
||||
head = page_buffers(page);
|
||||
bh = head;
|
||||
do {
|
||||
BUG_ON(buffer_locked(bh));
|
||||
if (buffer_dirty(bh))
|
||||
mpage_add_bh_to_extent(mpd, logical, bh);
|
||||
logical++;
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* mpage_da_writepages - walk the list of dirty pages of the given
|
||||
* address space, allocates non-allocated blocks, maps newly-allocated
|
||||
* blocks to existing bhs and issue IO them
|
||||
*
|
||||
* @mapping: address space structure to write
|
||||
* @wbc: subtract the number of written pages from *@wbc->nr_to_write
|
||||
* @get_block: the filesystem's block mapper function.
|
||||
*
|
||||
* This is a library function, which implements the writepages()
|
||||
* address_space_operation.
|
||||
*
|
||||
* In order to avoid duplication of logic that deals with partial pages,
|
||||
* multiple bio per page, etc, we find non-allocated blocks, allocate
|
||||
* them with minimal calls to ->get_block() and re-use __mpage_writepage()
|
||||
*
|
||||
* It's important that we call __mpage_writepage() only once for each
|
||||
* involved page, otherwise we'd have to implement more complicated logic
|
||||
* to deal with pages w/o PG_lock or w/ PG_writeback and so on.
|
||||
*
|
||||
* See comments to mpage_writepages()
|
||||
*/
|
||||
static int mpage_da_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc,
|
||||
get_block_t get_block)
|
||||
{
|
||||
struct mpage_da_data mpd;
|
||||
int ret;
|
||||
|
||||
if (!get_block)
|
||||
return generic_writepages(mapping, wbc);
|
||||
|
||||
mpd.wbc = wbc;
|
||||
mpd.inode = mapping->host;
|
||||
mpd.lbh.b_size = 0;
|
||||
mpd.lbh.b_state = 0;
|
||||
mpd.lbh.b_blocknr = 0;
|
||||
mpd.first_page = 0;
|
||||
mpd.next_page = 0;
|
||||
mpd.get_block = get_block;
|
||||
|
||||
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
|
||||
|
||||
/*
|
||||
* Handle last extent of pages
|
||||
*/
|
||||
if (mpd.next_page != mpd.first_page) {
|
||||
mpage_da_map_blocks(&mpd);
|
||||
mpage_da_submit_io(&mpd);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* this is a special callback for ->write_begin() only
|
||||
* it's intention is to return mapped block or reserve space
|
||||
*/
|
||||
static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(create == 0);
|
||||
BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
|
||||
|
||||
/*
|
||||
* first, we need to know whether the block is allocated already
|
||||
* preallocated blocks are unmapped but should treated
|
||||
* the same as allocated blocks.
|
||||
*/
|
||||
ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0);
|
||||
if (ret == 0) {
|
||||
/* the block isn't allocated yet, let's reserve space */
|
||||
/* XXX: call reservation here */
|
||||
/*
|
||||
* XXX: __block_prepare_write() unmaps passed block,
|
||||
* is it OK?
|
||||
*/
|
||||
map_bh(bh_result, inode->i_sb, 0);
|
||||
set_buffer_new(bh_result);
|
||||
set_buffer_delay(bh_result);
|
||||
} else if (ret > 0) {
|
||||
bh_result->b_size = (ret << inode->i_blkbits);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create)
|
||||
{
|
||||
int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
|
||||
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
|
||||
loff_t disksize = EXT4_I(inode)->i_disksize;
|
||||
handle_t *handle = NULL;
|
||||
|
||||
if (create) {
|
||||
handle = ext4_journal_start(inode, needed_blocks);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
|
||||
bh_result, create, 0);
|
||||
if (ret > 0) {
|
||||
bh_result->b_size = (ret << inode->i_blkbits);
|
||||
|
||||
/*
|
||||
* Update on-disk size along with block allocation
|
||||
* we don't use 'extend_disksize' as size may change
|
||||
* within already allocated block -bzzz
|
||||
*/
|
||||
disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
|
||||
if (disksize > i_size_read(inode))
|
||||
disksize = i_size_read(inode);
|
||||
if (disksize > EXT4_I(inode)->i_disksize) {
|
||||
/*
|
||||
* XXX: replace with spinlock if seen contended -bzzz
|
||||
*/
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
if (disksize > EXT4_I(inode)->i_disksize)
|
||||
EXT4_I(inode)->i_disksize = disksize;
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
|
||||
if (EXT4_I(inode)->i_disksize == disksize) {
|
||||
if (handle == NULL)
|
||||
handle = ext4_journal_start(inode, 1);
|
||||
if (!IS_ERR(handle))
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
if (handle && !IS_ERR(handle))
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
return ret;
|
||||
}
|
||||
/* FIXME!! only support data=writeback mode */
|
||||
static int ext4_da_writepage(struct page *page,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
struct inode *inode = page->mapping->host;
|
||||
handle_t *handle = NULL;
|
||||
int ret = 0;
|
||||
int err;
|
||||
|
||||
if (ext4_journal_current_handle())
|
||||
goto out_fail;
|
||||
|
||||
handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
|
||||
ret = nobh_writepage(page, ext4_get_block, wbc);
|
||||
else
|
||||
ret = block_write_full_page(page, ext4_get_block, wbc);
|
||||
|
||||
if (!ret && inode->i_size > EXT4_I(inode)->i_disksize) {
|
||||
EXT4_I(inode)->i_disksize = inode->i_size;
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
}
|
||||
|
||||
err = ext4_journal_stop(handle);
|
||||
if (!ret)
|
||||
ret = err;
|
||||
return ret;
|
||||
|
||||
out_fail:
|
||||
redirty_page_for_writepage(wbc, page);
|
||||
unlock_page(page);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ext4_da_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
return mpage_da_writepages(mapping, wbc, ext4_da_get_block_write);
|
||||
}
|
||||
|
||||
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata)
|
||||
{
|
||||
int ret;
|
||||
struct page *page;
|
||||
pgoff_t index;
|
||||
unsigned from, to;
|
||||
struct inode *inode = mapping->host;
|
||||
handle_t *handle;
|
||||
|
||||
index = pos >> PAGE_CACHE_SHIFT;
|
||||
from = pos & (PAGE_CACHE_SIZE - 1);
|
||||
to = from + len;
|
||||
|
||||
/*
|
||||
* With delayed allocation, we don't log the i_disksize update
|
||||
* if there is delayed block allocation. But we still need
|
||||
* to journalling the i_disksize update if writes to the end
|
||||
* of file which has an already mapped buffer.
|
||||
*/
|
||||
handle = ext4_journal_start(inode, 1);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
goto out;
|
||||
}
|
||||
|
||||
page = __grab_cache_page(mapping, index);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
*pagep = page;
|
||||
|
||||
ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
|
||||
ext4_da_get_block_prep);
|
||||
if (ret < 0) {
|
||||
unlock_page(page);
|
||||
ext4_journal_stop(handle);
|
||||
page_cache_release(page);
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
|
||||
{
|
||||
return !buffer_mapped(bh) || buffer_delay(bh);
|
||||
}
|
||||
|
||||
static int ext4_da_write_end(struct file *file,
|
||||
struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
int ret = 0, ret2;
|
||||
handle_t *handle = ext4_journal_current_handle();
|
||||
loff_t new_i_size;
|
||||
|
||||
/*
|
||||
* generic_write_end() will run mark_inode_dirty() if i_size
|
||||
* changes. So let's piggyback the i_disksize mark_inode_dirty
|
||||
* into that.
|
||||
*/
|
||||
|
||||
new_i_size = pos + copied;
|
||||
if (new_i_size > EXT4_I(inode)->i_disksize)
|
||||
if (!walk_page_buffers(NULL, page_buffers(page),
|
||||
0, len, NULL, ext4_bh_unmapped_or_delay)){
|
||||
/*
|
||||
* Updating i_disksize when extending file without
|
||||
* needing block allocation
|
||||
*/
|
||||
if (ext4_should_order_data(inode))
|
||||
ret = ext4_jbd2_file_inode(handle, inode);
|
||||
|
||||
EXT4_I(inode)->i_disksize = new_i_size;
|
||||
}
|
||||
ret2 = generic_write_end(file, mapping, pos, len, copied,
|
||||
page, fsdata);
|
||||
copied = ret2;
|
||||
if (ret2 < 0)
|
||||
ret = ret2;
|
||||
ret2 = ext4_journal_stop(handle);
|
||||
if (!ret)
|
||||
ret = ret2;
|
||||
|
||||
return ret ? ret : copied;
|
||||
}
|
||||
|
||||
static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
|
||||
{
|
||||
struct buffer_head *head, *bh;
|
||||
unsigned int curr_off = 0;
|
||||
|
||||
/*
|
||||
* Drop reserved blocks
|
||||
*/
|
||||
BUG_ON(!PageLocked(page));
|
||||
if (!page_has_buffers(page))
|
||||
goto out;
|
||||
|
||||
head = page_buffers(page);
|
||||
bh = head;
|
||||
do {
|
||||
unsigned int next_off = curr_off + bh->b_size;
|
||||
|
||||
/*
|
||||
* is this block fully invalidated?
|
||||
*/
|
||||
if (offset <= curr_off && buffer_delay(bh)) {
|
||||
clear_buffer_delay(bh);
|
||||
/* XXX: add real stuff here */
|
||||
}
|
||||
curr_off = next_off;
|
||||
bh = bh->b_this_page;
|
||||
} while (bh != head);
|
||||
|
||||
out:
|
||||
ext4_invalidatepage(page, offset);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* bmap() is special. It gets used by applications such as lilo and by
|
||||
* the swapper to find the on-disk block of a specific piece of data.
|
||||
|
@ -1427,6 +2093,16 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
|
|||
journal_t *journal;
|
||||
int err;
|
||||
|
||||
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
|
||||
test_opt(inode->i_sb, DELALLOC)) {
|
||||
/*
|
||||
* With delalloc we want to sync the file
|
||||
* so that we can make sure we allocate
|
||||
* blocks for file
|
||||
*/
|
||||
filemap_write_and_wait(mapping);
|
||||
}
|
||||
|
||||
if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
|
||||
/*
|
||||
* This is a REALLY heavyweight approach, but the use of
|
||||
|
@ -1471,11 +2147,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
|
||||
{
|
||||
return !buffer_mapped(bh) || buffer_delay(bh);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that we don't need to start a transaction unless we're journaling data
|
||||
* because we should have holes filled from ext4_page_mkwrite(). We even don't
|
||||
|
@ -1832,10 +2503,28 @@ static const struct address_space_operations ext4_journalled_aops = {
|
|||
.releasepage = ext4_releasepage,
|
||||
};
|
||||
|
||||
static const struct address_space_operations ext4_da_aops = {
|
||||
.readpage = ext4_readpage,
|
||||
.readpages = ext4_readpages,
|
||||
.writepage = ext4_da_writepage,
|
||||
.writepages = ext4_da_writepages,
|
||||
.sync_page = block_sync_page,
|
||||
.write_begin = ext4_da_write_begin,
|
||||
.write_end = ext4_da_write_end,
|
||||
.bmap = ext4_bmap,
|
||||
.invalidatepage = ext4_da_invalidatepage,
|
||||
.releasepage = ext4_releasepage,
|
||||
.direct_IO = ext4_direct_IO,
|
||||
.migratepage = buffer_migrate_page,
|
||||
};
|
||||
|
||||
void ext4_set_aops(struct inode *inode)
|
||||
{
|
||||
if (ext4_should_order_data(inode))
|
||||
inode->i_mapping->a_ops = &ext4_ordered_aops;
|
||||
else if (ext4_should_writeback_data(inode) &&
|
||||
test_opt(inode->i_sb, DELALLOC))
|
||||
inode->i_mapping->a_ops = &ext4_da_aops;
|
||||
else if (ext4_should_writeback_data(inode))
|
||||
inode->i_mapping->a_ops = &ext4_writeback_aops;
|
||||
else
|
||||
|
|
|
@ -898,7 +898,7 @@ enum {
|
|||
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
|
||||
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
|
||||
Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
|
||||
Opt_mballoc, Opt_nomballoc, Opt_stripe,
|
||||
Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc,
|
||||
};
|
||||
|
||||
static match_table_t tokens = {
|
||||
|
@ -957,6 +957,7 @@ static match_table_t tokens = {
|
|||
{Opt_nomballoc, "nomballoc"},
|
||||
{Opt_stripe, "stripe=%u"},
|
||||
{Opt_resize, "resize"},
|
||||
{Opt_delalloc, "delalloc"},
|
||||
{Opt_err, NULL},
|
||||
};
|
||||
|
||||
|
@ -1335,6 +1336,9 @@ set_qf_format:
|
|||
return 0;
|
||||
sbi->s_stripe = option;
|
||||
break;
|
||||
case Opt_delalloc:
|
||||
set_opt(sbi->s_mount_opt, DELALLOC);
|
||||
break;
|
||||
default:
|
||||
printk (KERN_ERR
|
||||
"EXT4-fs: Unrecognized mount option \"%s\" "
|
||||
|
|
Loading…
Reference in New Issue