2012-11-29 12:28:09 +08:00
|
|
|
/*
|
2012-11-02 16:09:44 +08:00
|
|
|
* fs/f2fs/file.c
|
|
|
|
*
|
|
|
|
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
|
|
|
|
* http://www.samsung.com/
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/f2fs_fs.h>
|
|
|
|
#include <linux/stat.h>
|
|
|
|
#include <linux/buffer_head.h>
|
|
|
|
#include <linux/writeback.h>
|
2013-03-16 10:13:04 +08:00
|
|
|
#include <linux/blkdev.h>
|
2012-11-02 16:09:44 +08:00
|
|
|
#include <linux/falloc.h>
|
|
|
|
#include <linux/types.h>
|
2013-02-04 22:41:41 +08:00
|
|
|
#include <linux/compat.h>
|
2012-11-02 16:09:44 +08:00
|
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <linux/mount.h>
|
2014-04-28 17:12:36 +08:00
|
|
|
#include <linux/pagevec.h>
|
2012-11-02 16:09:44 +08:00
|
|
|
|
|
|
|
#include "f2fs.h"
|
|
|
|
#include "node.h"
|
|
|
|
#include "segment.h"
|
|
|
|
#include "xattr.h"
|
|
|
|
#include "acl.h"
|
2013-04-20 00:28:40 +08:00
|
|
|
#include <trace/events/f2fs.h>
|
2012-11-02 16:09:44 +08:00
|
|
|
|
|
|
|
static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
|
|
|
|
struct vm_fault *vmf)
|
|
|
|
{
|
|
|
|
struct page *page = vmf->page;
|
2013-02-28 05:59:05 +08:00
|
|
|
struct inode *inode = file_inode(vma->vm_file);
|
2014-09-03 06:31:18 +08:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2012-11-02 16:09:44 +08:00
|
|
|
struct dnode_of_data dn;
|
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 18:08:30 +08:00
|
|
|
int err;
|
2012-11-02 16:09:44 +08:00
|
|
|
|
|
|
|
f2fs_balance_fs(sbi);
|
|
|
|
|
|
|
|
sb_start_pagefault(inode->i_sb);
|
|
|
|
|
2014-08-08 07:32:25 +08:00
|
|
|
/* force to convert with normal data indices */
|
|
|
|
err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page);
|
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
/* block allocation */
|
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 18:08:30 +08:00
|
|
|
f2fs_lock_op(sbi);
|
2012-11-02 16:09:44 +08:00
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
2013-11-10 23:13:18 +08:00
|
|
|
err = f2fs_reserve_block(&dn, page->index);
|
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 18:08:30 +08:00
|
|
|
f2fs_unlock_op(sbi);
|
2013-11-10 23:13:18 +08:00
|
|
|
if (err)
|
|
|
|
goto out;
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2013-04-28 08:04:18 +08:00
|
|
|
file_update_time(vma->vm_file);
|
2012-11-02 16:09:44 +08:00
|
|
|
lock_page(page);
|
2013-12-06 14:00:58 +08:00
|
|
|
if (unlikely(page->mapping != inode->i_mapping ||
|
2013-04-28 08:04:18 +08:00
|
|
|
page_offset(page) > i_size_read(inode) ||
|
2013-12-06 14:00:58 +08:00
|
|
|
!PageUptodate(page))) {
|
2012-11-02 16:09:44 +08:00
|
|
|
unlock_page(page);
|
|
|
|
err = -EFAULT;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* check to see if the page is mapped already (no holes)
|
|
|
|
*/
|
|
|
|
if (PageMappedToDisk(page))
|
2013-04-28 08:04:18 +08:00
|
|
|
goto mapped;
|
2012-11-02 16:09:44 +08:00
|
|
|
|
|
|
|
/* page is wholly or partially inside EOF */
|
|
|
|
if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) {
|
|
|
|
unsigned offset;
|
|
|
|
offset = i_size_read(inode) & ~PAGE_CACHE_MASK;
|
|
|
|
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
|
|
|
|
}
|
|
|
|
set_page_dirty(page);
|
|
|
|
SetPageUptodate(page);
|
|
|
|
|
2013-10-25 13:26:31 +08:00
|
|
|
trace_f2fs_vm_page_mkwrite(page, DATA);
|
2013-04-28 08:04:18 +08:00
|
|
|
mapped:
|
|
|
|
/* fill the page */
|
2014-03-18 12:29:07 +08:00
|
|
|
f2fs_wait_on_page_writeback(page, DATA);
|
2012-11-02 16:09:44 +08:00
|
|
|
out:
|
|
|
|
sb_end_pagefault(inode->i_sb);
|
|
|
|
return block_page_mkwrite_return(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct vm_operations_struct f2fs_file_vm_ops = {
|
2013-01-17 17:37:41 +08:00
|
|
|
.fault = filemap_fault,
|
2014-04-08 06:37:19 +08:00
|
|
|
.map_pages = filemap_map_pages,
|
2013-01-17 17:37:41 +08:00
|
|
|
.page_mkwrite = f2fs_vm_page_mkwrite,
|
|
|
|
.remap_pages = generic_file_remap_pages,
|
2012-11-02 16:09:44 +08:00
|
|
|
};
|
|
|
|
|
2013-06-14 07:52:35 +08:00
|
|
|
static int get_parent_ino(struct inode *inode, nid_t *pino)
|
|
|
|
{
|
|
|
|
struct dentry *dentry;
|
|
|
|
|
|
|
|
inode = igrab(inode);
|
|
|
|
dentry = d_find_any_alias(inode);
|
|
|
|
iput(inode);
|
|
|
|
if (!dentry)
|
|
|
|
return 0;
|
|
|
|
|
2013-07-22 21:12:56 +08:00
|
|
|
if (update_dent_inode(inode, &dentry->d_name)) {
|
|
|
|
dput(dentry);
|
|
|
|
return 0;
|
|
|
|
}
|
2013-06-14 07:52:35 +08:00
|
|
|
|
2013-07-22 21:12:56 +08:00
|
|
|
*pino = parent_ino(dentry);
|
|
|
|
dput(dentry);
|
2013-06-14 07:52:35 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2014-08-20 18:37:35 +08:00
|
|
|
static inline bool need_do_checkpoint(struct inode *inode)
|
|
|
|
{
|
2014-09-03 06:31:18 +08:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2014-08-20 18:37:35 +08:00
|
|
|
bool need_cp = false;
|
|
|
|
|
|
|
|
if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
|
|
|
|
need_cp = true;
|
|
|
|
else if (file_wrong_pino(inode))
|
|
|
|
need_cp = true;
|
|
|
|
else if (!space_for_roll_forward(sbi))
|
|
|
|
need_cp = true;
|
|
|
|
else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
|
|
|
|
need_cp = true;
|
|
|
|
else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
|
|
|
|
need_cp = true;
|
|
|
|
|
|
|
|
return need_cp;
|
|
|
|
}
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
|
|
|
{
|
|
|
|
struct inode *inode = file->f_mapping->host;
|
2014-03-20 18:10:08 +08:00
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
2014-09-03 06:31:18 +08:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2014-09-11 05:58:18 +08:00
|
|
|
nid_t ino = inode->i_ino;
|
2012-11-02 16:09:44 +08:00
|
|
|
int ret = 0;
|
|
|
|
bool need_cp = false;
|
|
|
|
struct writeback_control wbc = {
|
2014-03-03 10:28:40 +08:00
|
|
|
.sync_mode = WB_SYNC_ALL,
|
2012-11-02 16:09:44 +08:00
|
|
|
.nr_to_write = LONG_MAX,
|
|
|
|
.for_reclaim = 0,
|
|
|
|
};
|
|
|
|
|
2013-12-06 14:00:58 +08:00
|
|
|
if (unlikely(f2fs_readonly(inode->i_sb)))
|
2012-12-01 09:56:01 +08:00
|
|
|
return 0;
|
|
|
|
|
2013-04-20 00:28:40 +08:00
|
|
|
trace_f2fs_sync_file_enter(inode);
|
2014-07-25 10:11:43 +08:00
|
|
|
|
|
|
|
/* if fdatasync is triggered, let's do in-place-update */
|
2014-09-11 07:53:02 +08:00
|
|
|
if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
|
2014-07-25 10:11:43 +08:00
|
|
|
set_inode_flag(fi, FI_NEED_IPU);
|
2012-11-02 16:09:44 +08:00
|
|
|
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
|
2014-09-11 07:53:02 +08:00
|
|
|
clear_inode_flag(fi, FI_NEED_IPU);
|
|
|
|
|
2013-04-20 00:28:40 +08:00
|
|
|
if (ret) {
|
|
|
|
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
|
2012-11-02 16:09:44 +08:00
|
|
|
return ret;
|
2013-04-20 00:28:40 +08:00
|
|
|
}
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2014-07-25 10:08:02 +08:00
|
|
|
/*
|
|
|
|
* if there is no written data, don't waste time to write recovery info.
|
|
|
|
*/
|
|
|
|
if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
|
2014-09-11 05:58:18 +08:00
|
|
|
!exist_written_data(sbi, ino, APPEND_INO)) {
|
2014-09-11 06:04:03 +08:00
|
|
|
struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
|
|
|
|
|
|
|
|
/* But we need to avoid that there are some inode updates */
|
|
|
|
if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) {
|
|
|
|
f2fs_put_page(i, 0);
|
|
|
|
goto go_write;
|
|
|
|
}
|
|
|
|
f2fs_put_page(i, 0);
|
|
|
|
|
2014-07-25 10:08:02 +08:00
|
|
|
if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
|
2014-09-11 05:58:18 +08:00
|
|
|
exist_written_data(sbi, ino, UPDATE_INO))
|
2014-07-25 10:08:02 +08:00
|
|
|
goto flush_out;
|
|
|
|
goto out;
|
|
|
|
}
|
2014-09-11 06:04:03 +08:00
|
|
|
go_write:
|
2013-01-11 12:10:49 +08:00
|
|
|
/* guarantee free sections for fsync */
|
|
|
|
f2fs_balance_fs(sbi);
|
|
|
|
|
2013-07-03 09:55:52 +08:00
|
|
|
/*
|
|
|
|
* Both of fdatasync() and fsync() are able to be recovered from
|
|
|
|
* sudden-power-off.
|
|
|
|
*/
|
2014-08-20 18:37:35 +08:00
|
|
|
down_read(&fi->i_sem);
|
|
|
|
need_cp = need_do_checkpoint(inode);
|
2014-03-20 18:10:08 +08:00
|
|
|
up_read(&fi->i_sem);
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
if (need_cp) {
|
2013-06-14 07:52:35 +08:00
|
|
|
nid_t pino;
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
/* all the dirty node pages should be flushed for POR */
|
|
|
|
ret = f2fs_sync_fs(inode->i_sb, 1);
|
2014-03-20 18:10:08 +08:00
|
|
|
|
|
|
|
down_write(&fi->i_sem);
|
|
|
|
F2FS_I(inode)->xattr_ver = 0;
|
2013-06-14 07:52:35 +08:00
|
|
|
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
|
|
|
|
get_parent_ino(inode, &pino)) {
|
|
|
|
F2FS_I(inode)->i_pino = pino;
|
|
|
|
file_got_pino(inode);
|
2014-03-20 18:10:08 +08:00
|
|
|
up_write(&fi->i_sem);
|
2013-06-14 07:52:35 +08:00
|
|
|
mark_inode_dirty_sync(inode);
|
|
|
|
ret = f2fs_write_inode(inode, NULL);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
2014-03-20 18:10:08 +08:00
|
|
|
} else {
|
|
|
|
up_write(&fi->i_sem);
|
2013-06-14 07:52:35 +08:00
|
|
|
}
|
2012-11-02 16:09:44 +08:00
|
|
|
} else {
|
f2fs: fix conditions to remain recovery information in f2fs_sync_file
This patch revisited whole the recovery information during the f2fs_sync_file.
In this patch, there are three information to make a decision.
a) IS_CHECKPOINTED, /* is it checkpointed before? */
b) HAS_FSYNCED_INODE, /* is the inode fsynced before? */
c) HAS_LAST_FSYNC, /* has the latest node fsync mark? */
And, the scenarios for our rule are based on:
[Term] F: fsync_mark, D: dentry_mark
1. inode(x) | CP | inode(x) | dnode(F)
2. inode(x) | CP | inode(F) | dnode(F)
3. inode(x) | CP | dnode(F) | inode(x) | inode(F)
4. inode(x) | CP | dnode(F) | inode(F)
5. CP | inode(x) | dnode(F) | inode(DF)
6. CP | inode(DF) | dnode(F)
7. CP | dnode(F) | inode(DF)
8. CP | dnode(F) | inode(x) | inode(DF)
For example, #3, the three conditions should be changed as follows.
inode(x) | CP | dnode(F) | inode(x) | inode(F)
a) x o o o o
b) x x x x o
c) x o o x o
If f2fs_sync_file stops ------^,
it should write inode(F) --------------^
So, the need_inode_block_update should return true, since
c) get_nat_flag(e, HAS_LAST_FSYNC), is false.
For example, #8,
CP | alloc | dnode(F) | inode(x) | inode(DF)
a) o x x x x
b) x x x o
c) o o x o
If f2fs_sync_file stops -------^,
it should write inode(DF) --------------^
Note that, the roll-forward policy should follow this rule, which means,
if there are any missing blocks, we doesn't need to recover that inode.
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2014-09-16 05:50:48 +08:00
|
|
|
sync_nodes:
|
|
|
|
sync_node_pages(sbi, ino, &wbc);
|
|
|
|
|
|
|
|
if (need_inode_block_update(sbi, ino)) {
|
2013-06-10 08:17:01 +08:00
|
|
|
mark_inode_dirty_sync(inode);
|
f2fs: fix handling errors got by f2fs_write_inode
Ruslan reported that f2fs hangs with an infinite loop in f2fs_sync_file():
while (sync_node_pages(sbi, inode->i_ino, &wbc) == 0)
f2fs_write_inode(inode, NULL);
The reason was revealed that the cold flag is not set even thought this inode is
a normal file. Therefore, sync_node_pages() skips to write node blocks since it
only writes cold node blocks.
The cold flag is stored to the node_footer in node block, and whenever a new
node page is allocated, it is set according to its file type, file or directory.
But, after sudden-power-off, when recovering the inode page, f2fs doesn't recover
its cold flag.
So, let's assign the cold flag in more right places.
One more thing:
If f2fs_write_inode() returns an error due to whatever situations, there would
be no dirty node pages so that sync_node_pages() returns zero.
(i.e., zero means nothing was written.)
Reported-by: Ruslan N. Marchenko <me@ruff.mobi>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-12-19 14:28:39 +08:00
|
|
|
ret = f2fs_write_inode(inode, NULL);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
f2fs: fix conditions to remain recovery information in f2fs_sync_file
This patch revisited whole the recovery information during the f2fs_sync_file.
In this patch, there are three information to make a decision.
a) IS_CHECKPOINTED, /* is it checkpointed before? */
b) HAS_FSYNCED_INODE, /* is the inode fsynced before? */
c) HAS_LAST_FSYNC, /* has the latest node fsync mark? */
And, the scenarios for our rule are based on:
[Term] F: fsync_mark, D: dentry_mark
1. inode(x) | CP | inode(x) | dnode(F)
2. inode(x) | CP | inode(F) | dnode(F)
3. inode(x) | CP | dnode(F) | inode(x) | inode(F)
4. inode(x) | CP | dnode(F) | inode(F)
5. CP | inode(x) | dnode(F) | inode(DF)
6. CP | inode(DF) | dnode(F)
7. CP | dnode(F) | inode(DF)
8. CP | dnode(F) | inode(x) | inode(DF)
For example, #3, the three conditions should be changed as follows.
inode(x) | CP | dnode(F) | inode(x) | inode(F)
a) x o o o o
b) x x x x o
c) x o o x o
If f2fs_sync_file stops ------^,
it should write inode(F) --------------^
So, the need_inode_block_update should return true, since
c) get_nat_flag(e, HAS_LAST_FSYNC), is false.
For example, #8,
CP | alloc | dnode(F) | inode(x) | inode(DF)
a) o x x x x
b) x x x o
c) o o x o
If f2fs_sync_file stops -------^,
it should write inode(DF) --------------^
Note that, the roll-forward policy should follow this rule, which means,
if there are any missing blocks, we doesn't need to recover that inode.
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2014-09-16 05:50:48 +08:00
|
|
|
goto sync_nodes;
|
f2fs: fix handling errors got by f2fs_write_inode
Ruslan reported that f2fs hangs with an infinite loop in f2fs_sync_file():
while (sync_node_pages(sbi, inode->i_ino, &wbc) == 0)
f2fs_write_inode(inode, NULL);
The reason was revealed that the cold flag is not set even thought this inode is
a normal file. Therefore, sync_node_pages() skips to write node blocks since it
only writes cold node blocks.
The cold flag is stored to the node_footer in node block, and whenever a new
node page is allocated, it is set according to its file type, file or directory.
But, after sudden-power-off, when recovering the inode page, f2fs doesn't recover
its cold flag.
So, let's assign the cold flag in more right places.
One more thing:
If f2fs_write_inode() returns an error due to whatever situations, there would
be no dirty node pages so that sync_node_pages() returns zero.
(i.e., zero means nothing was written.)
Reported-by: Ruslan N. Marchenko <me@ruff.mobi>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-12-19 14:28:39 +08:00
|
|
|
}
|
f2fs: fix conditions to remain recovery information in f2fs_sync_file
This patch revisited whole the recovery information during the f2fs_sync_file.
In this patch, there are three information to make a decision.
a) IS_CHECKPOINTED, /* is it checkpointed before? */
b) HAS_FSYNCED_INODE, /* is the inode fsynced before? */
c) HAS_LAST_FSYNC, /* has the latest node fsync mark? */
And, the scenarios for our rule are based on:
[Term] F: fsync_mark, D: dentry_mark
1. inode(x) | CP | inode(x) | dnode(F)
2. inode(x) | CP | inode(F) | dnode(F)
3. inode(x) | CP | dnode(F) | inode(x) | inode(F)
4. inode(x) | CP | dnode(F) | inode(F)
5. CP | inode(x) | dnode(F) | inode(DF)
6. CP | inode(DF) | dnode(F)
7. CP | dnode(F) | inode(DF)
8. CP | dnode(F) | inode(x) | inode(DF)
For example, #3, the three conditions should be changed as follows.
inode(x) | CP | dnode(F) | inode(x) | inode(F)
a) x o o o o
b) x x x x o
c) x o o x o
If f2fs_sync_file stops ------^,
it should write inode(F) --------------^
So, the need_inode_block_update should return true, since
c) get_nat_flag(e, HAS_LAST_FSYNC), is false.
For example, #8,
CP | alloc | dnode(F) | inode(x) | inode(DF)
a) o x x x x
b) x x x o
c) o o x o
If f2fs_sync_file stops -------^,
it should write inode(DF) --------------^
Note that, the roll-forward policy should follow this rule, which means,
if there are any missing blocks, we doesn't need to recover that inode.
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2014-09-16 05:50:48 +08:00
|
|
|
|
2014-09-11 05:58:18 +08:00
|
|
|
ret = wait_on_node_pages_writeback(sbi, ino);
|
2013-10-31 13:57:01 +08:00
|
|
|
if (ret)
|
|
|
|
goto out;
|
2014-07-25 10:08:02 +08:00
|
|
|
|
|
|
|
/* once recovery info is written, don't need to tack this */
|
2014-09-11 05:58:18 +08:00
|
|
|
remove_dirty_inode(sbi, ino, APPEND_INO);
|
2014-07-25 10:08:02 +08:00
|
|
|
clear_inode_flag(fi, FI_APPEND_WRITE);
|
|
|
|
flush_out:
|
2014-09-11 05:58:18 +08:00
|
|
|
remove_dirty_inode(sbi, ino, UPDATE_INO);
|
2014-07-25 10:08:02 +08:00
|
|
|
clear_inode_flag(fi, FI_UPDATE_WRITE);
|
2014-09-03 06:31:18 +08:00
|
|
|
ret = f2fs_issue_flush(F2FS_I_SB(inode));
|
2012-11-02 16:09:44 +08:00
|
|
|
}
|
|
|
|
out:
|
2013-04-20 00:28:40 +08:00
|
|
|
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
|
2012-11-02 16:09:44 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-04-28 17:12:36 +08:00
|
|
|
static pgoff_t __get_first_dirty_index(struct address_space *mapping,
|
|
|
|
pgoff_t pgofs, int whence)
|
|
|
|
{
|
|
|
|
struct pagevec pvec;
|
|
|
|
int nr_pages;
|
|
|
|
|
|
|
|
if (whence != SEEK_DATA)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* find first dirty page index */
|
|
|
|
pagevec_init(&pvec, 0);
|
2014-07-31 08:25:54 +08:00
|
|
|
nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
|
|
|
|
PAGECACHE_TAG_DIRTY, 1);
|
|
|
|
pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;
|
2014-04-28 17:12:36 +08:00
|
|
|
pagevec_release(&pvec);
|
|
|
|
return pgofs;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
|
|
|
|
int whence)
|
|
|
|
{
|
|
|
|
switch (whence) {
|
|
|
|
case SEEK_DATA:
|
|
|
|
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
|
|
|
|
(blkaddr != NEW_ADDR && blkaddr != NULL_ADDR))
|
|
|
|
return true;
|
|
|
|
break;
|
|
|
|
case SEEK_HOLE:
|
|
|
|
if (blkaddr == NULL_ADDR)
|
|
|
|
return true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-04-23 14:10:24 +08:00
|
|
|
static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
|
|
|
|
{
|
|
|
|
struct inode *inode = file->f_mapping->host;
|
|
|
|
loff_t maxbytes = inode->i_sb->s_maxbytes;
|
|
|
|
struct dnode_of_data dn;
|
2014-04-28 17:12:36 +08:00
|
|
|
pgoff_t pgofs, end_offset, dirty;
|
|
|
|
loff_t data_ofs = offset;
|
|
|
|
loff_t isize;
|
2014-04-23 14:10:24 +08:00
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
mutex_lock(&inode->i_mutex);
|
|
|
|
|
|
|
|
isize = i_size_read(inode);
|
|
|
|
if (offset >= isize)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
/* handle inline data case */
|
|
|
|
if (f2fs_has_inline_data(inode)) {
|
|
|
|
if (whence == SEEK_HOLE)
|
|
|
|
data_ofs = isize;
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
|
|
|
|
pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);
|
|
|
|
|
2014-04-28 17:12:36 +08:00
|
|
|
dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
|
|
|
|
|
2014-04-23 14:10:24 +08:00
|
|
|
for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) {
|
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
|
|
|
err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
|
|
|
|
if (err && err != -ENOENT) {
|
|
|
|
goto fail;
|
|
|
|
} else if (err == -ENOENT) {
|
2014-08-06 22:22:50 +08:00
|
|
|
/* direct node does not exists */
|
2014-04-23 14:10:24 +08:00
|
|
|
if (whence == SEEK_DATA) {
|
|
|
|
pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
|
|
|
|
F2FS_I(inode));
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-07 11:21:59 +08:00
|
|
|
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
|
2014-04-23 14:10:24 +08:00
|
|
|
|
|
|
|
/* find data/hole in dnode block */
|
|
|
|
for (; dn.ofs_in_node < end_offset;
|
|
|
|
dn.ofs_in_node++, pgofs++,
|
|
|
|
data_ofs = pgofs << PAGE_CACHE_SHIFT) {
|
|
|
|
block_t blkaddr;
|
|
|
|
blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
|
|
|
|
|
2014-04-28 17:12:36 +08:00
|
|
|
if (__found_offset(blkaddr, dirty, pgofs, whence)) {
|
2014-04-23 14:10:24 +08:00
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
goto found;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (whence == SEEK_DATA)
|
|
|
|
goto fail;
|
|
|
|
found:
|
2014-04-28 16:02:48 +08:00
|
|
|
if (whence == SEEK_HOLE && data_ofs > isize)
|
|
|
|
data_ofs = isize;
|
2014-04-23 14:10:24 +08:00
|
|
|
mutex_unlock(&inode->i_mutex);
|
|
|
|
return vfs_setpos(file, data_ofs, maxbytes);
|
|
|
|
fail:
|
|
|
|
mutex_unlock(&inode->i_mutex);
|
|
|
|
return -ENXIO;
|
|
|
|
}
|
|
|
|
|
|
|
|
static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
|
|
|
|
{
|
|
|
|
struct inode *inode = file->f_mapping->host;
|
|
|
|
loff_t maxbytes = inode->i_sb->s_maxbytes;
|
|
|
|
|
|
|
|
switch (whence) {
|
|
|
|
case SEEK_SET:
|
|
|
|
case SEEK_CUR:
|
|
|
|
case SEEK_END:
|
|
|
|
return generic_file_llseek_size(file, offset, whence,
|
|
|
|
maxbytes, i_size_read(inode));
|
|
|
|
case SEEK_DATA:
|
|
|
|
case SEEK_HOLE:
|
2014-09-09 01:59:43 +08:00
|
|
|
if (offset < 0)
|
|
|
|
return -ENXIO;
|
2014-04-23 14:10:24 +08:00
|
|
|
return f2fs_seek_block(file, offset, whence);
|
|
|
|
}
|
|
|
|
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
file_accessed(file);
|
|
|
|
vma->vm_ops = &f2fs_file_vm_ops;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-05-22 07:02:02 +08:00
|
|
|
int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
|
2012-11-02 16:09:44 +08:00
|
|
|
{
|
|
|
|
int nr_free = 0, ofs = dn->ofs_in_node;
|
2014-09-03 06:31:18 +08:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
|
2012-11-02 16:09:44 +08:00
|
|
|
struct f2fs_node *raw_node;
|
|
|
|
__le32 *addr;
|
|
|
|
|
2013-07-15 17:57:38 +08:00
|
|
|
raw_node = F2FS_NODE(dn->node_page);
|
2012-11-02 16:09:44 +08:00
|
|
|
addr = blkaddr_in_node(raw_node) + ofs;
|
|
|
|
|
2014-01-18 04:44:39 +08:00
|
|
|
for (; count > 0; count--, addr++, dn->ofs_in_node++) {
|
2012-11-02 16:09:44 +08:00
|
|
|
block_t blkaddr = le32_to_cpu(*addr);
|
|
|
|
if (blkaddr == NULL_ADDR)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
update_extent_cache(NULL_ADDR, dn);
|
|
|
|
invalidate_blocks(sbi, blkaddr);
|
|
|
|
nr_free++;
|
|
|
|
}
|
|
|
|
if (nr_free) {
|
2013-06-08 20:25:40 +08:00
|
|
|
dec_valid_block_count(sbi, dn->inode, nr_free);
|
2012-11-02 16:09:44 +08:00
|
|
|
set_page_dirty(dn->node_page);
|
|
|
|
sync_inode_page(dn);
|
|
|
|
}
|
|
|
|
dn->ofs_in_node = ofs;
|
2013-04-20 00:28:52 +08:00
|
|
|
|
|
|
|
trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
|
|
|
|
dn->ofs_in_node, nr_free);
|
2012-11-02 16:09:44 +08:00
|
|
|
return nr_free;
|
|
|
|
}
|
|
|
|
|
|
|
|
void truncate_data_blocks(struct dnode_of_data *dn)
|
|
|
|
{
|
|
|
|
truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void truncate_partial_data_page(struct inode *inode, u64 from)
|
|
|
|
{
|
|
|
|
unsigned offset = from & (PAGE_CACHE_SIZE - 1);
|
|
|
|
struct page *page;
|
|
|
|
|
2014-04-29 09:03:03 +08:00
|
|
|
if (f2fs_has_inline_data(inode))
|
|
|
|
return truncate_inline_data(inode, from);
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
if (!offset)
|
|
|
|
return;
|
|
|
|
|
f2fs: give a chance to merge IOs by IO scheduler
Previously, background GC submits many 4KB read requests to load victim blocks
and/or its (i)node blocks.
...
f2fs_gc : f2fs_readpage: ino = 1, page_index = 0xb61, blkaddr = 0x3b964ed
f2fs_gc : block_rq_complete: 8,16 R () 499854968 + 8 [0]
f2fs_gc : f2fs_readpage: ino = 1, page_index = 0xb6f, blkaddr = 0x3b964ee
f2fs_gc : block_rq_complete: 8,16 R () 499854976 + 8 [0]
f2fs_gc : f2fs_readpage: ino = 1, page_index = 0xb79, blkaddr = 0x3b964ef
f2fs_gc : block_rq_complete: 8,16 R () 499854984 + 8 [0]
...
However, by the fact that many IOs are sequential, we can give a chance to merge
the IOs by IO scheduler.
In order to do that, let's use blk_plug.
...
f2fs_gc : f2fs_iget: ino = 143
f2fs_gc : f2fs_readpage: ino = 143, page_index = 0x1c6, blkaddr = 0x2e6ee
f2fs_gc : f2fs_iget: ino = 143
f2fs_gc : f2fs_readpage: ino = 143, page_index = 0x1c7, blkaddr = 0x2e6ef
<idle> : block_rq_complete: 8,16 R () 1519616 + 8 [0]
<idle> : block_rq_complete: 8,16 R () 1519848 + 8 [0]
<idle> : block_rq_complete: 8,16 R () 1520432 + 96 [0]
<idle> : block_rq_complete: 8,16 R () 1520536 + 104 [0]
<idle> : block_rq_complete: 8,16 R () 1521008 + 112 [0]
<idle> : block_rq_complete: 8,16 R () 1521440 + 152 [0]
<idle> : block_rq_complete: 8,16 R () 1521688 + 144 [0]
<idle> : block_rq_complete: 8,16 R () 1522128 + 192 [0]
<idle> : block_rq_complete: 8,16 R () 1523256 + 328 [0]
...
Note that this issue should be addressed in checkpoint, and some readahead
flows too.
Reviewed-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-04-24 12:19:56 +08:00
|
|
|
page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false);
|
2012-11-02 16:09:44 +08:00
|
|
|
if (IS_ERR(page))
|
|
|
|
return;
|
|
|
|
|
|
|
|
lock_page(page);
|
2014-06-12 13:31:50 +08:00
|
|
|
if (unlikely(!PageUptodate(page) ||
|
|
|
|
page->mapping != inode->i_mapping))
|
|
|
|
goto out;
|
|
|
|
|
2014-03-18 12:29:07 +08:00
|
|
|
f2fs_wait_on_page_writeback(page, DATA);
|
2012-11-02 16:09:44 +08:00
|
|
|
zero_user(page, offset, PAGE_CACHE_SIZE - offset);
|
|
|
|
set_page_dirty(page);
|
2014-06-12 13:31:50 +08:00
|
|
|
|
|
|
|
out:
|
2012-11-02 16:09:44 +08:00
|
|
|
f2fs_put_page(page, 1);
|
|
|
|
}
|
|
|
|
|
2014-08-15 07:32:54 +08:00
|
|
|
int truncate_blocks(struct inode *inode, u64 from, bool lock)
|
2012-11-02 16:09:44 +08:00
|
|
|
{
|
2014-09-03 06:31:18 +08:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2012-11-02 16:09:44 +08:00
|
|
|
unsigned int blocksize = inode->i_sb->s_blocksize;
|
|
|
|
struct dnode_of_data dn;
|
|
|
|
pgoff_t free_from;
|
f2fs: handle inline data operations
Hook inline data read/write, truncate, fallocate, setattr, etc.
Files need meet following 2 requirement to inline:
1) file size is not greater than MAX_INLINE_DATA;
2) file doesn't pre-allocate data blocks by fallocate().
FI_INLINE_DATA will not be set while creating a new regular inode because
most of the files are bigger than ~3.4K. Set FI_INLINE_DATA only when
data is submitted to block layer, ranther than set it while creating a new
inode, this also avoids converting data from inline to normal data block
and vice versa.
While writting inline data to inode block, the first data block should be
released if the file has a block indexed by i_addr[0].
On the other hand, when a file operation is appied to a file with inline
data, we need to test if this file can remain inline by doing this
operation, otherwise it should be convert into normal file by reserving
a new data block, copying inline data to this new block and clear
FI_INLINE_DATA flag. Because reserve a new data block here will make use
of i_addr[0], if we save inline data in i_addr[0..872], then the first
4 bytes would be overwriten. This problem can be avoided simply by
not using i_addr[0] for inline data.
Signed-off-by: Huajun Li <huajun.li@intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Signed-off-by: Weihong Xu <weihong.xu@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-11-10 23:13:20 +08:00
|
|
|
int count = 0, err = 0;
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2013-04-20 00:28:52 +08:00
|
|
|
trace_f2fs_truncate_blocks_enter(inode, from);
|
|
|
|
|
2013-12-27 10:01:54 +08:00
|
|
|
if (f2fs_has_inline_data(inode))
|
|
|
|
goto done;
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
free_from = (pgoff_t)
|
|
|
|
((from + blocksize - 1) >> (sbi->log_blocksize));
|
|
|
|
|
2014-08-15 07:32:54 +08:00
|
|
|
if (lock)
|
|
|
|
f2fs_lock_op(sbi);
|
f2fs: handle inline data operations
Hook inline data read/write, truncate, fallocate, setattr, etc.
Files need meet following 2 requirement to inline:
1) file size is not greater than MAX_INLINE_DATA;
2) file doesn't pre-allocate data blocks by fallocate().
FI_INLINE_DATA will not be set while creating a new regular inode because
most of the files are bigger than ~3.4K. Set FI_INLINE_DATA only when
data is submitted to block layer, ranther than set it while creating a new
inode, this also avoids converting data from inline to normal data block
and vice versa.
While writting inline data to inode block, the first data block should be
released if the file has a block indexed by i_addr[0].
On the other hand, when a file operation is appied to a file with inline
data, we need to test if this file can remain inline by doing this
operation, otherwise it should be convert into normal file by reserving
a new data block, copying inline data to this new block and clear
FI_INLINE_DATA flag. Because reserve a new data block here will make use
of i_addr[0], if we save inline data in i_addr[0..872], then the first
4 bytes would be overwriten. This problem can be avoided simply by
not using i_addr[0] for inline data.
Signed-off-by: Huajun Li <huajun.li@intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Signed-off-by: Weihong Xu <weihong.xu@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-11-10 23:13:20 +08:00
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
2013-02-26 12:10:46 +08:00
|
|
|
err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
|
2012-11-02 16:09:44 +08:00
|
|
|
if (err) {
|
|
|
|
if (err == -ENOENT)
|
|
|
|
goto free_next;
|
2014-08-15 07:32:54 +08:00
|
|
|
if (lock)
|
|
|
|
f2fs_unlock_op(sbi);
|
2013-04-20 00:28:52 +08:00
|
|
|
trace_f2fs_truncate_blocks_exit(inode, err);
|
2012-11-02 16:09:44 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2014-04-26 19:59:52 +08:00
|
|
|
count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
|
2012-11-02 16:09:44 +08:00
|
|
|
|
|
|
|
count -= dn.ofs_in_node;
|
2014-09-03 06:52:58 +08:00
|
|
|
f2fs_bug_on(sbi, count < 0);
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
|
|
|
|
truncate_data_blocks_range(&dn, count);
|
|
|
|
free_from += count;
|
|
|
|
}
|
|
|
|
|
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
free_next:
|
|
|
|
err = truncate_inode_blocks(inode, free_from);
|
2014-08-15 07:32:54 +08:00
|
|
|
if (lock)
|
|
|
|
f2fs_unlock_op(sbi);
|
2013-12-27 10:01:54 +08:00
|
|
|
done:
|
2012-11-02 16:09:44 +08:00
|
|
|
/* lastly zero out the first data page */
|
|
|
|
truncate_partial_data_page(inode, from);
|
|
|
|
|
2013-04-20 00:28:52 +08:00
|
|
|
trace_f2fs_truncate_blocks_exit(inode, err);
|
2012-11-02 16:09:44 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
void f2fs_truncate(struct inode *inode)
|
|
|
|
{
|
|
|
|
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
|
|
|
|
S_ISLNK(inode->i_mode)))
|
|
|
|
return;
|
|
|
|
|
2013-04-20 00:28:52 +08:00
|
|
|
trace_f2fs_truncate(inode);
|
|
|
|
|
2014-08-15 07:32:54 +08:00
|
|
|
if (!truncate_blocks(inode, i_size_read(inode), true)) {
|
2012-11-02 16:09:44 +08:00
|
|
|
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
|
|
|
|
mark_inode_dirty(inode);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-07 15:33:07 +08:00
|
|
|
int f2fs_getattr(struct vfsmount *mnt,
|
2012-11-02 16:09:44 +08:00
|
|
|
struct dentry *dentry, struct kstat *stat)
|
|
|
|
{
|
|
|
|
struct inode *inode = dentry->d_inode;
|
|
|
|
generic_fillattr(inode, stat);
|
|
|
|
stat->blocks <<= 3;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_F2FS_FS_POSIX_ACL
|
|
|
|
static void __setattr_copy(struct inode *inode, const struct iattr *attr)
|
|
|
|
{
|
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
|
|
|
unsigned int ia_valid = attr->ia_valid;
|
|
|
|
|
|
|
|
if (ia_valid & ATTR_UID)
|
|
|
|
inode->i_uid = attr->ia_uid;
|
|
|
|
if (ia_valid & ATTR_GID)
|
|
|
|
inode->i_gid = attr->ia_gid;
|
|
|
|
if (ia_valid & ATTR_ATIME)
|
|
|
|
inode->i_atime = timespec_trunc(attr->ia_atime,
|
|
|
|
inode->i_sb->s_time_gran);
|
|
|
|
if (ia_valid & ATTR_MTIME)
|
|
|
|
inode->i_mtime = timespec_trunc(attr->ia_mtime,
|
|
|
|
inode->i_sb->s_time_gran);
|
|
|
|
if (ia_valid & ATTR_CTIME)
|
|
|
|
inode->i_ctime = timespec_trunc(attr->ia_ctime,
|
|
|
|
inode->i_sb->s_time_gran);
|
|
|
|
if (ia_valid & ATTR_MODE) {
|
|
|
|
umode_t mode = attr->ia_mode;
|
|
|
|
|
|
|
|
if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
|
|
|
|
mode &= ~S_ISGID;
|
|
|
|
set_acl_inode(fi, mode);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
#define __setattr_copy setattr_copy
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
|
|
|
|
{
|
|
|
|
struct inode *inode = dentry->d_inode;
|
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = inode_change_ok(inode, attr);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2014-09-15 18:02:09 +08:00
|
|
|
if (attr->ia_valid & ATTR_SIZE) {
|
2014-08-08 07:32:25 +08:00
|
|
|
err = f2fs_convert_inline_data(inode, attr->ia_size, NULL);
|
2013-12-27 11:28:59 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
f2fs: handle inline data operations
Hook inline data read/write, truncate, fallocate, setattr, etc.
Files need meet following 2 requirement to inline:
1) file size is not greater than MAX_INLINE_DATA;
2) file doesn't pre-allocate data blocks by fallocate().
FI_INLINE_DATA will not be set while creating a new regular inode because
most of the files are bigger than ~3.4K. Set FI_INLINE_DATA only when
data is submitted to block layer, ranther than set it while creating a new
inode, this also avoids converting data from inline to normal data block
and vice versa.
While writting inline data to inode block, the first data block should be
released if the file has a block indexed by i_addr[0].
On the other hand, when a file operation is appied to a file with inline
data, we need to test if this file can remain inline by doing this
operation, otherwise it should be convert into normal file by reserving
a new data block, copying inline data to this new block and clear
FI_INLINE_DATA flag. Because reserve a new data block here will make use
of i_addr[0], if we save inline data in i_addr[0..872], then the first
4 bytes would be overwriten. This problem can be avoided simply by
not using i_addr[0] for inline data.
Signed-off-by: Huajun Li <huajun.li@intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Signed-off-by: Weihong Xu <weihong.xu@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-11-10 23:13:20 +08:00
|
|
|
|
2014-09-15 18:02:09 +08:00
|
|
|
if (attr->ia_size != i_size_read(inode)) {
|
|
|
|
truncate_setsize(inode, attr->ia_size);
|
|
|
|
f2fs_truncate(inode);
|
|
|
|
f2fs_balance_fs(F2FS_I_SB(inode));
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* giving a chance to truncate blocks past EOF which
|
|
|
|
* are fallocated with FALLOC_FL_KEEP_SIZE.
|
|
|
|
*/
|
|
|
|
f2fs_truncate(inode);
|
|
|
|
}
|
2012-11-02 16:09:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
__setattr_copy(inode, attr);
|
|
|
|
|
|
|
|
if (attr->ia_valid & ATTR_MODE) {
|
2013-12-20 21:16:45 +08:00
|
|
|
err = posix_acl_chmod(inode, get_inode_mode(inode));
|
2012-11-02 16:09:44 +08:00
|
|
|
if (err || is_inode_flag_set(fi, FI_ACL_MODE)) {
|
|
|
|
inode->i_mode = fi->i_acl_mode;
|
|
|
|
clear_inode_flag(fi, FI_ACL_MODE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mark_inode_dirty(inode);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct inode_operations f2fs_file_inode_operations = {
|
|
|
|
.getattr = f2fs_getattr,
|
|
|
|
.setattr = f2fs_setattr,
|
|
|
|
.get_acl = f2fs_get_acl,
|
2013-12-20 21:16:45 +08:00
|
|
|
.set_acl = f2fs_set_acl,
|
2012-11-02 16:09:44 +08:00
|
|
|
#ifdef CONFIG_F2FS_FS_XATTR
|
|
|
|
.setxattr = generic_setxattr,
|
|
|
|
.getxattr = generic_getxattr,
|
|
|
|
.listxattr = f2fs_listxattr,
|
|
|
|
.removexattr = generic_removexattr,
|
|
|
|
#endif
|
2014-06-08 03:30:14 +08:00
|
|
|
.fiemap = f2fs_fiemap,
|
2012-11-02 16:09:44 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static void fill_zero(struct inode *inode, pgoff_t index,
|
|
|
|
loff_t start, loff_t len)
|
|
|
|
{
|
2014-09-03 06:31:18 +08:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2012-11-02 16:09:44 +08:00
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
if (!len)
|
|
|
|
return;
|
|
|
|
|
2013-01-25 17:33:41 +08:00
|
|
|
f2fs_balance_fs(sbi);
|
|
|
|
|
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 18:08:30 +08:00
|
|
|
f2fs_lock_op(sbi);
|
2013-05-20 08:55:50 +08:00
|
|
|
page = get_new_data_page(inode, NULL, index, false);
|
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 18:08:30 +08:00
|
|
|
f2fs_unlock_op(sbi);
|
2012-11-02 16:09:44 +08:00
|
|
|
|
|
|
|
if (!IS_ERR(page)) {
|
2014-03-18 12:29:07 +08:00
|
|
|
f2fs_wait_on_page_writeback(page, DATA);
|
2012-11-02 16:09:44 +08:00
|
|
|
zero_user(page, start, len);
|
|
|
|
set_page_dirty(page);
|
|
|
|
f2fs_put_page(page, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
|
|
|
|
{
|
|
|
|
pgoff_t index;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
for (index = pg_start; index < pg_end; index++) {
|
|
|
|
struct dnode_of_data dn;
|
2013-01-11 13:09:38 +08:00
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
2013-02-26 12:10:46 +08:00
|
|
|
err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
|
2012-11-02 16:09:44 +08:00
|
|
|
if (err) {
|
|
|
|
if (err == -ENOENT)
|
|
|
|
continue;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dn.data_blkaddr != NULL_ADDR)
|
|
|
|
truncate_data_blocks_range(&dn, 1);
|
|
|
|
f2fs_put_dnode(&dn);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-11-22 16:52:50 +08:00
|
|
|
static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
|
2012-11-02 16:09:44 +08:00
|
|
|
{
|
|
|
|
pgoff_t pg_start, pg_end;
|
|
|
|
loff_t off_start, off_end;
|
|
|
|
int ret = 0;
|
|
|
|
|
2014-09-15 18:03:32 +08:00
|
|
|
if (!S_ISREG(inode->i_mode))
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
/* skip punching hole beyond i_size */
|
|
|
|
if (offset >= inode->i_size)
|
|
|
|
return ret;
|
|
|
|
|
2014-08-08 07:32:25 +08:00
|
|
|
ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
|
2013-12-27 10:13:21 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
f2fs: handle inline data operations
Hook inline data read/write, truncate, fallocate, setattr, etc.
Files need meet following 2 requirement to inline:
1) file size is not greater than MAX_INLINE_DATA;
2) file doesn't pre-allocate data blocks by fallocate().
FI_INLINE_DATA will not be set while creating a new regular inode because
most of the files are bigger than ~3.4K. Set FI_INLINE_DATA only when
data is submitted to block layer, ranther than set it while creating a new
inode, this also avoids converting data from inline to normal data block
and vice versa.
While writting inline data to inode block, the first data block should be
released if the file has a block indexed by i_addr[0].
On the other hand, when a file operation is appied to a file with inline
data, we need to test if this file can remain inline by doing this
operation, otherwise it should be convert into normal file by reserving
a new data block, copying inline data to this new block and clear
FI_INLINE_DATA flag. Because reserve a new data block here will make use
of i_addr[0], if we save inline data in i_addr[0..872], then the first
4 bytes would be overwriten. This problem can be avoided simply by
not using i_addr[0] for inline data.
Signed-off-by: Huajun Li <huajun.li@intel.com>
Signed-off-by: Haicheng Li <haicheng.li@linux.intel.com>
Signed-off-by: Weihong Xu <weihong.xu@intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-11-10 23:13:20 +08:00
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
|
|
|
|
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
|
|
|
|
|
|
|
|
off_start = offset & (PAGE_CACHE_SIZE - 1);
|
|
|
|
off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
|
|
|
|
|
|
|
|
if (pg_start == pg_end) {
|
|
|
|
fill_zero(inode, pg_start, off_start,
|
|
|
|
off_end - off_start);
|
|
|
|
} else {
|
|
|
|
if (off_start)
|
|
|
|
fill_zero(inode, pg_start++, off_start,
|
|
|
|
PAGE_CACHE_SIZE - off_start);
|
|
|
|
if (off_end)
|
|
|
|
fill_zero(inode, pg_end, 0, off_end);
|
|
|
|
|
|
|
|
if (pg_start < pg_end) {
|
|
|
|
struct address_space *mapping = inode->i_mapping;
|
|
|
|
loff_t blk_start, blk_end;
|
2014-09-03 06:31:18 +08:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2013-04-09 09:16:44 +08:00
|
|
|
|
|
|
|
f2fs_balance_fs(sbi);
|
2012-11-02 16:09:44 +08:00
|
|
|
|
|
|
|
blk_start = pg_start << PAGE_CACHE_SHIFT;
|
|
|
|
blk_end = pg_end << PAGE_CACHE_SHIFT;
|
|
|
|
truncate_inode_pages_range(mapping, blk_start,
|
|
|
|
blk_end - 1);
|
f2fs: introduce a new global lock scheme
In the previous version, f2fs uses global locks according to the usage types,
such as directory operations, block allocation, block write, and so on.
Reference the following lock types in f2fs.h.
enum lock_type {
RENAME, /* for renaming operations */
DENTRY_OPS, /* for directory operations */
DATA_WRITE, /* for data write */
DATA_NEW, /* for data allocation */
DATA_TRUNC, /* for data truncate */
NODE_NEW, /* for node allocation */
NODE_TRUNC, /* for node truncate */
NODE_WRITE, /* for node write */
NR_LOCK_TYPE,
};
In that case, we lose the performance under the multi-threading environment,
since every types of operations must be conducted one at a time.
In order to address the problem, let's share the locks globally with a mutex
array regardless of any types.
So, let users grab a mutex and perform their jobs in parallel as much as
possbile.
For this, I propose a new global lock scheme as follows.
0. Data structure
- f2fs_sb_info -> mutex_lock[NR_GLOBAL_LOCKS]
- f2fs_sb_info -> node_write
1. mutex_lock_op(sbi)
- try to get an avaiable lock from the array.
- returns the index of the gottern lock variable.
2. mutex_unlock_op(sbi, index of the lock)
- unlock the given index of the lock.
3. mutex_lock_all(sbi)
- grab all the locks in the array before the checkpoint.
4. mutex_unlock_all(sbi)
- release all the locks in the array after checkpoint.
5. block_operations()
- call mutex_lock_all()
- sync_dirty_dir_inodes()
- grab node_write
- sync_node_pages()
Note that,
the pairs of mutex_lock_op()/mutex_unlock_op() and
mutex_lock_all()/mutex_unlock_all() should be used together.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2012-11-22 15:21:29 +08:00
|
|
|
|
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 18:08:30 +08:00
|
|
|
f2fs_lock_op(sbi);
|
2012-11-02 16:09:44 +08:00
|
|
|
ret = truncate_hole(inode, pg_start, pg_end);
|
f2fs: use rw_sem instead of fs_lock(locks mutex)
The fs_locks is used to block other ops(ex, recovery) when doing checkpoint.
And each other operate routine(besides checkpoint) needs to acquire a fs_lock,
there is a terrible problem here, if these are too many concurrency threads acquiring
fs_lock, so that they will block each other and may lead to some performance problem,
but this is not the phenomenon we want to see.
Though there are some optimization patches introduced to enhance the usage of fs_lock,
but the thorough solution is using a *rw_sem* to replace the fs_lock.
Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block
other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other,
this can avoid the problem described above completely.
Because of the weakness of rw_sem, the above change may introduce a potential problem
that the checkpoint thread might get starved if other threads are intensively locking
the read semaphore for I/O.(Pointed out by Xu Jin)
In order to avoid this, a wait_list is introduced, the appending read semaphore ops
will be dropped into the wait_list if checkpoint thread is waiting for write semaphore,
and will be waked up when checkpoint thread gives up write semaphore.
Thanks to Kim's previous review and test, and will be very glad to see other guys'
performance tests about this patch.
V2:
-fix the potential starvation problem.
-use more suitable func name suggested by Xu Jin.
Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: adjust minor coding standard]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
2013-09-27 18:08:30 +08:00
|
|
|
f2fs_unlock_op(sbi);
|
2012-11-02 16:09:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int expand_inode_data(struct inode *inode, loff_t offset,
|
|
|
|
loff_t len, int mode)
|
|
|
|
{
|
2014-09-03 06:31:18 +08:00
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
2012-11-02 16:09:44 +08:00
|
|
|
pgoff_t index, pg_start, pg_end;
|
|
|
|
loff_t new_size = i_size_read(inode);
|
|
|
|
loff_t off_start, off_end;
|
|
|
|
int ret = 0;
|
|
|
|
|
2014-08-04 10:11:17 +08:00
|
|
|
f2fs_balance_fs(sbi);
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
ret = inode_newsize_ok(inode, (len + offset));
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2014-08-08 07:32:25 +08:00
|
|
|
ret = f2fs_convert_inline_data(inode, offset + len, NULL);
|
2013-12-27 11:28:59 +08:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
|
|
|
|
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
|
|
|
|
|
|
|
|
off_start = offset & (PAGE_CACHE_SIZE - 1);
|
|
|
|
off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
|
|
|
|
|
2014-06-13 12:05:55 +08:00
|
|
|
f2fs_lock_op(sbi);
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
for (index = pg_start; index <= pg_end; index++) {
|
|
|
|
struct dnode_of_data dn;
|
|
|
|
|
2014-06-13 12:07:31 +08:00
|
|
|
if (index == pg_end && !off_end)
|
|
|
|
goto noalloc;
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
set_new_dnode(&dn, inode, NULL, NULL, 0);
|
2013-11-10 23:13:18 +08:00
|
|
|
ret = f2fs_reserve_block(&dn, index);
|
|
|
|
if (ret)
|
2012-11-02 16:09:44 +08:00
|
|
|
break;
|
2014-06-13 12:07:31 +08:00
|
|
|
noalloc:
|
2012-11-02 16:09:44 +08:00
|
|
|
if (pg_start == pg_end)
|
|
|
|
new_size = offset + len;
|
|
|
|
else if (index == pg_start && off_start)
|
|
|
|
new_size = (index + 1) << PAGE_CACHE_SHIFT;
|
|
|
|
else if (index == pg_end)
|
|
|
|
new_size = (index << PAGE_CACHE_SHIFT) + off_end;
|
|
|
|
else
|
|
|
|
new_size += PAGE_CACHE_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
|
|
|
|
i_size_read(inode) < new_size) {
|
|
|
|
i_size_write(inode, new_size);
|
|
|
|
mark_inode_dirty(inode);
|
2014-06-13 12:05:55 +08:00
|
|
|
update_inode_page(inode);
|
2012-11-02 16:09:44 +08:00
|
|
|
}
|
2014-06-13 12:05:55 +08:00
|
|
|
f2fs_unlock_op(sbi);
|
2012-11-02 16:09:44 +08:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long f2fs_fallocate(struct file *file, int mode,
|
|
|
|
loff_t offset, loff_t len)
|
|
|
|
{
|
2013-02-28 05:59:05 +08:00
|
|
|
struct inode *inode = file_inode(file);
|
2012-11-02 16:09:44 +08:00
|
|
|
long ret;
|
|
|
|
|
|
|
|
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
2014-01-28 10:29:26 +08:00
|
|
|
mutex_lock(&inode->i_mutex);
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
if (mode & FALLOC_FL_PUNCH_HOLE)
|
2013-11-22 16:52:50 +08:00
|
|
|
ret = punch_hole(inode, offset, len);
|
2012-11-02 16:09:44 +08:00
|
|
|
else
|
|
|
|
ret = expand_inode_data(inode, offset, len, mode);
|
|
|
|
|
2012-12-30 13:52:37 +08:00
|
|
|
if (!ret) {
|
|
|
|
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
|
|
|
|
mark_inode_dirty(inode);
|
|
|
|
}
|
2014-01-28 10:29:26 +08:00
|
|
|
|
|
|
|
mutex_unlock(&inode->i_mutex);
|
|
|
|
|
2013-04-23 16:00:52 +08:00
|
|
|
trace_f2fs_fallocate(inode, mode, offset, len, ret);
|
2012-11-02 16:09:44 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
|
|
|
|
#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
|
|
|
|
|
|
|
|
static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
|
|
|
|
{
|
|
|
|
if (S_ISDIR(mode))
|
|
|
|
return flags;
|
|
|
|
else if (S_ISREG(mode))
|
|
|
|
return flags & F2FS_REG_FLMASK;
|
|
|
|
else
|
|
|
|
return flags & F2FS_OTHER_FLMASK;
|
|
|
|
}
|
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
|
2012-11-02 16:09:44 +08:00
|
|
|
{
|
2013-02-28 05:59:05 +08:00
|
|
|
struct inode *inode = file_inode(filp);
|
2012-11-02 16:09:44 +08:00
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
2014-09-25 06:37:02 +08:00
|
|
|
unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
|
|
|
|
return put_user(flags, (int __user *)arg);
|
|
|
|
}
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct f2fs_inode_info *fi = F2FS_I(inode);
|
|
|
|
unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
|
|
|
|
unsigned int oldflags;
|
|
|
|
int ret;
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
if (!inode_owner_or_capable(inode)) {
|
|
|
|
ret = -EACCES;
|
|
|
|
goto out;
|
|
|
|
}
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
if (get_user(flags, (int __user *)arg)) {
|
|
|
|
ret = -EFAULT;
|
|
|
|
goto out;
|
|
|
|
}
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
flags = f2fs_mask_flags(inode->i_mode, flags);
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
mutex_lock(&inode->i_mutex);
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
oldflags = fi->i_flags;
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
|
|
|
|
if (!capable(CAP_LINUX_IMMUTABLE)) {
|
|
|
|
mutex_unlock(&inode->i_mutex);
|
|
|
|
ret = -EPERM;
|
|
|
|
goto out;
|
2012-11-02 16:09:44 +08:00
|
|
|
}
|
2014-09-25 06:37:02 +08:00
|
|
|
}
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
flags = flags & FS_FL_USER_MODIFIABLE;
|
|
|
|
flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
|
|
|
|
fi->i_flags = flags;
|
|
|
|
mutex_unlock(&inode->i_mutex);
|
2012-11-02 16:09:44 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
f2fs_set_inode_flags(inode);
|
|
|
|
inode->i_ctime = CURRENT_TIME;
|
|
|
|
mark_inode_dirty(inode);
|
2012-11-02 16:09:44 +08:00
|
|
|
out:
|
2014-09-25 06:37:02 +08:00
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return ret;
|
|
|
|
}
|
2014-09-21 13:06:39 +08:00
|
|
|
|
2014-10-07 08:39:50 +08:00
|
|
|
static int f2fs_ioc_start_atomic_write(struct file *filp)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
|
|
|
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
|
|
return -EACCES;
|
|
|
|
|
|
|
|
f2fs_balance_fs(sbi);
|
|
|
|
|
|
|
|
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
|
|
|
|
|
|
|
|
return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int f2fs_ioc_commit_atomic_write(struct file *filp)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
|
|
return -EACCES;
|
|
|
|
|
2014-10-07 07:11:16 +08:00
|
|
|
if (f2fs_is_volatile_file(inode))
|
|
|
|
return 0;
|
|
|
|
|
2014-10-07 08:39:50 +08:00
|
|
|
ret = mnt_want_write_file(filp);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (f2fs_is_atomic_file(inode))
|
|
|
|
commit_inmem_pages(inode, false);
|
|
|
|
|
|
|
|
ret = f2fs_sync_file(filp, 0, LONG_MAX, 0);
|
|
|
|
mnt_drop_write_file(filp);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-10-07 07:11:16 +08:00
|
|
|
static int f2fs_ioc_start_volatile_write(struct file *filp)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
|
|
|
|
if (!inode_owner_or_capable(inode))
|
|
|
|
return -EACCES;
|
|
|
|
|
|
|
|
set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
|
|
|
|
{
|
|
|
|
struct inode *inode = file_inode(filp);
|
|
|
|
struct super_block *sb = inode->i_sb;
|
|
|
|
struct request_queue *q = bdev_get_queue(sb->s_bdev);
|
|
|
|
struct fstrim_range range;
|
|
|
|
int ret;
|
2014-09-21 13:06:39 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
|
|
return -EPERM;
|
2014-09-21 13:06:39 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
if (!blk_queue_discard(q))
|
|
|
|
return -EOPNOTSUPP;
|
2014-09-21 13:06:39 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
|
|
|
|
sizeof(range)))
|
|
|
|
return -EFAULT;
|
2014-09-21 13:06:39 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
range.minlen = max((unsigned int)range.minlen,
|
|
|
|
q->limits.discard_granularity);
|
|
|
|
ret = f2fs_trim_fs(F2FS_SB(sb), &range);
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
2014-09-21 13:06:39 +08:00
|
|
|
|
2014-09-25 06:37:02 +08:00
|
|
|
if (copy_to_user((struct fstrim_range __user *)arg, &range,
|
|
|
|
sizeof(range)))
|
|
|
|
return -EFAULT;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|
|
|
{
|
|
|
|
switch (cmd) {
|
|
|
|
case F2FS_IOC_GETFLAGS:
|
|
|
|
return f2fs_ioc_getflags(filp, arg);
|
|
|
|
case F2FS_IOC_SETFLAGS:
|
|
|
|
return f2fs_ioc_setflags(filp, arg);
|
2014-10-07 08:39:50 +08:00
|
|
|
case F2FS_IOC_START_ATOMIC_WRITE:
|
|
|
|
return f2fs_ioc_start_atomic_write(filp);
|
|
|
|
case F2FS_IOC_COMMIT_ATOMIC_WRITE:
|
|
|
|
return f2fs_ioc_commit_atomic_write(filp);
|
2014-10-07 07:11:16 +08:00
|
|
|
case F2FS_IOC_START_VOLATILE_WRITE:
|
|
|
|
return f2fs_ioc_start_volatile_write(filp);
|
2014-09-25 06:37:02 +08:00
|
|
|
case FITRIM:
|
|
|
|
return f2fs_ioc_fitrim(filp, arg);
|
2012-11-02 16:09:44 +08:00
|
|
|
default:
|
|
|
|
return -ENOTTY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-02-04 22:41:41 +08:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
|
|
{
|
|
|
|
switch (cmd) {
|
|
|
|
case F2FS_IOC32_GETFLAGS:
|
|
|
|
cmd = F2FS_IOC_GETFLAGS;
|
|
|
|
break;
|
|
|
|
case F2FS_IOC32_SETFLAGS:
|
|
|
|
cmd = F2FS_IOC_SETFLAGS;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return -ENOIOCTLCMD;
|
|
|
|
}
|
|
|
|
return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2012-11-02 16:09:44 +08:00
|
|
|
const struct file_operations f2fs_file_operations = {
|
2014-04-23 14:10:24 +08:00
|
|
|
.llseek = f2fs_llseek,
|
2014-04-03 02:33:16 +08:00
|
|
|
.read = new_sync_read,
|
2014-04-03 15:17:43 +08:00
|
|
|
.write = new_sync_write,
|
2014-04-03 02:33:16 +08:00
|
|
|
.read_iter = generic_file_read_iter,
|
2014-04-03 15:17:43 +08:00
|
|
|
.write_iter = generic_file_write_iter,
|
2012-11-02 16:09:44 +08:00
|
|
|
.open = generic_file_open,
|
|
|
|
.mmap = f2fs_file_mmap,
|
|
|
|
.fsync = f2fs_sync_file,
|
|
|
|
.fallocate = f2fs_fallocate,
|
|
|
|
.unlocked_ioctl = f2fs_ioctl,
|
2013-02-04 22:41:41 +08:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
.compat_ioctl = f2fs_compat_ioctl,
|
|
|
|
#endif
|
2012-11-02 16:09:44 +08:00
|
|
|
.splice_read = generic_file_splice_read,
|
2014-04-05 16:27:08 +08:00
|
|
|
.splice_write = iter_file_splice_write,
|
2012-11-02 16:09:44 +08:00
|
|
|
};
|