Merge tag 'for-f2fs-3.20' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "Major changes are to:
   - add f2fs_io_tracer and F2FS_IOC_GETVERSION
   - fix wrong acl assignment from parent
   - fix accessing wrong data blocks
   - fix wrong condition check for f2fs_sync_fs
   - align start block address for direct_io
   - add and refactor the readahead flows of FS metadata
   - refactor atomic and volatile write policies

  But most of patches are for clean-ups and minor bug fixes.  Some of
  them refactor old code too"

* tag 'for-f2fs-3.20' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (64 commits)
  f2fs: use spinlock for segmap_lock instead of rwlock
  f2fs: fix accessing wrong indexed data blocks
  f2fs: avoid variable length array
  f2fs: fix sparse warnings
  f2fs: allocate data blocks in advance for f2fs_direct_IO
  f2fs: introduce macros to convert bytes and blocks in f2fs
  f2fs: call set_buffer_new for get_block
  f2fs: check node page contents all the time
  f2fs: avoid data offset overflow when lseeking huge file
  f2fs: fix to use highmem for pages of newly created directory
  f2fs: introduce a batched trim
  f2fs: merge {invalidate,release}page for meta/node/data pages
  f2fs: show the number of writeback pages in stat
  f2fs: keep PagePrivate during releasepage
  f2fs: should fail mount when trying to recover data on read-only dev
  f2fs: split UMOUNT and FASTBOOT flags
  f2fs: avoid write_checkpoint if f2fs is mounted readonly
  f2fs: support norecovery mount option
  f2fs: fix not to drop mount options when retrying fill_super
  f2fs: merge flags in struct f2fs_sb_info
  ...
This commit is contained in:
Linus Torvalds 2015-02-12 19:28:50 -08:00
commit c7d7b98671
26 changed files with 1076 additions and 560 deletions

View File

@ -74,3 +74,9 @@ Date: March 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the memory footprint used by f2fs.
What: /sys/fs/f2fs/<disk>/trim_sections
Date: February 2015
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls the trimming rate in batch mode.

View File

@ -106,6 +106,8 @@ background_gc=%s Turn on/off cleaning operations, namely garbage
Default value for this option is on. So garbage
collection is on by default.
disable_roll_forward Disable the roll-forward recovery routine
norecovery Disable the roll-forward recovery routine, mounted read-
only (i.e., -o ro,disable_roll_forward)
discard Issue discard/TRIM commands when a segment is cleaned.
no_heap Disable heap-style segment allocation which finds free
segments for data from the beginning of main area, while
@ -197,6 +199,10 @@ Files in /sys/fs/f2fs/<devname>
checkpoint is triggered, and issued during the
checkpoint. By default, it is disabled with 0.
trim_sections This parameter controls the number of sections
to be trimmed out in batch mode when FITRIM
conducts. 32 sections is set by default.
ipu_policy This parameter controls the policy of in-place
updates in f2fs. There are five policies:
0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR,

View File

@ -71,3 +71,13 @@ config F2FS_CHECK_FS
Enables BUG_ONs which check the filesystem consistency in runtime.
If you want to improve the performance, say N.
config F2FS_IO_TRACE
bool "F2FS IO tracer"
depends on F2FS_FS
depends on FUNCTION_TRACER
help
F2FS IO trace is based on a function trace, which gathers process
information and block IO patterns in the filesystem level.
If unsure, say N.

View File

@ -5,3 +5,4 @@ f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o

View File

@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
if (count == 0)
return NULL;
acl = posix_acl_alloc(count, GFP_KERNEL);
acl = posix_acl_alloc(count, GFP_NOFS);
if (!acl)
return ERR_PTR(-ENOMEM);
@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
int i;
f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
sizeof(struct f2fs_acl_entry), GFP_KERNEL);
sizeof(struct f2fs_acl_entry), GFP_NOFS);
if (!f2fs_acl)
return ERR_PTR(-ENOMEM);
@ -396,7 +396,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
posix_acl_release(default_acl);
}
if (acl) {
if (error)
if (!error)
error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl,
ipage);
posix_acl_release(acl);

View File

@ -20,10 +20,11 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
#include "trace.h"
#include <trace/events/f2fs.h>
static struct kmem_cache *ino_entry_slab;
static struct kmem_cache *inode_entry_slab;
struct kmem_cache *inode_entry_slab;
/*
* We guarantee no failure on the returned page.
@ -50,6 +51,11 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct address_space *mapping = META_MAPPING(sbi);
struct page *page;
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO,
.blk_addr = index,
};
repeat:
page = grab_cache_page(mapping, index);
if (!page) {
@ -59,8 +65,7 @@ repeat:
if (PageUptodate(page))
goto out;
if (f2fs_submit_page_bio(sbi, page, index,
READ_SYNC | REQ_META | REQ_PRIO))
if (f2fs_submit_page_bio(sbi, page, &fio))
goto repeat;
lock_page(page);
@ -112,14 +117,12 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
block_t prev_blk_addr = 0;
struct page *page;
block_t blkno = start;
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
};
for (; nrpages-- > 0; blkno++) {
block_t blk_addr;
if (!is_valid_blkaddr(sbi, blkno, type))
goto out;
@ -130,27 +133,27 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
blkno = 0;
/* get nat block addr */
blk_addr = current_nat_addr(sbi,
fio.blk_addr = current_nat_addr(sbi,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
/* get sit block addr */
blk_addr = current_sit_addr(sbi,
fio.blk_addr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
if (blkno != start && prev_blk_addr + 1 != blk_addr)
if (blkno != start && prev_blk_addr + 1 != fio.blk_addr)
goto out;
prev_blk_addr = blk_addr;
prev_blk_addr = fio.blk_addr;
break;
case META_SSA:
case META_CP:
case META_POR:
blk_addr = blkno;
fio.blk_addr = blkno;
break;
default:
BUG();
}
page = grab_cache_page(META_MAPPING(sbi), blk_addr);
page = grab_cache_page(META_MAPPING(sbi), fio.blk_addr);
if (!page)
continue;
if (PageUptodate(page)) {
@ -158,7 +161,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
continue;
}
f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
f2fs_submit_page_mbio(sbi, page, &fio);
f2fs_put_page(page, 0);
}
out:
@ -187,7 +190,7 @@ static int f2fs_write_meta_page(struct page *page,
trace_f2fs_writepage(page, META);
if (unlikely(sbi->por_doing))
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
goto redirty_out;
@ -299,6 +302,8 @@ static int f2fs_set_meta_page_dirty(struct page *page)
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
SetPagePrivate(page);
f2fs_trace_pid(page);
return 1;
}
return 0;
@ -308,6 +313,8 @@ const struct address_space_operations f2fs_meta_aops = {
.writepage = f2fs_write_meta_page,
.writepages = f2fs_write_meta_pages,
.set_page_dirty = f2fs_set_meta_page_dirty,
.invalidatepage = f2fs_invalidate_page,
.releasepage = f2fs_release_page,
};
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@ -462,7 +469,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
return;
sbi->por_doing = true;
set_sbi_flag(sbi, SBI_POR_DOING);
start_blk = __start_cp_addr(sbi) + 1 +
le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
@ -483,7 +490,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
}
/* clear Orphan Flag */
clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
sbi->por_doing = false;
clear_sbi_flag(sbi, SBI_POR_DOING);
return;
}
@ -567,7 +574,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
if (crc_offset >= blk_size)
goto invalid_cp1;
crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
if (!f2fs_crc_valid(crc, cp_block, crc_offset))
goto invalid_cp1;
@ -582,7 +589,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
if (crc_offset >= blk_size)
goto invalid_cp2;
crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
if (!f2fs_crc_valid(crc, cp_block, crc_offset))
goto invalid_cp2;
@ -669,7 +676,7 @@ fail_no_cp:
return -EINVAL;
}
static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
static int __add_dirty_inode(struct inode *inode, struct inode_entry *new)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@ -686,7 +693,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
void update_dirty_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dir_inode_entry *new;
struct inode_entry *new;
int ret = 0;
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
@ -710,12 +717,13 @@ void update_dirty_page(struct inode *inode, struct page *page)
kmem_cache_free(inode_entry_slab, new);
out:
SetPagePrivate(page);
f2fs_trace_pid(page);
}
void add_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dir_inode_entry *new =
struct inode_entry *new =
f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
int ret = 0;
@ -733,7 +741,7 @@ void add_dirty_dir_inode(struct inode *inode)
void remove_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dir_inode_entry *entry;
struct inode_entry *entry;
if (!S_ISDIR(inode->i_mode))
return;
@ -763,7 +771,7 @@ void remove_dirty_dir_inode(struct inode *inode)
void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
{
struct list_head *head;
struct dir_inode_entry *entry;
struct inode_entry *entry;
struct inode *inode;
retry:
if (unlikely(f2fs_cp_error(sbi)))
@ -776,7 +784,7 @@ retry:
spin_unlock(&sbi->dir_inode_lock);
return;
}
entry = list_entry(head->next, struct dir_inode_entry, list);
entry = list_entry(head->next, struct inode_entry, list);
inode = igrab(entry->inode);
spin_unlock(&sbi->dir_inode_lock);
if (inode) {
@ -922,7 +930,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
ckpt->next_free_nid = cpu_to_le32(last_nid);
/* 2 cp + n data seg summary + orphan inode blocks */
data_sum_blocks = npages_for_summary_flush(sbi);
data_sum_blocks = npages_for_summary_flush(sbi, false);
if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
else
@ -932,24 +940,31 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
orphan_blocks);
if (cpc->reason == CP_UMOUNT) {
set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
if (__remain_node_summaries(cpc->reason))
ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
cp_payload_blks + data_sum_blocks +
orphan_blocks + NR_CURSEG_NODE_TYPE);
} else {
clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
cp_payload_blks + data_sum_blocks +
orphan_blocks);
}
if (cpc->reason == CP_UMOUNT)
set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
if (cpc->reason == CP_FASTBOOT)
set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
else
clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
if (orphan_num)
set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
else
clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
if (sbi->need_fsck)
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
set_ckpt_flags(ckpt, CP_FSCK_FLAG);
/* update SIT/NAT bitmap */
@ -966,15 +981,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* write out checkpoint buffer at block 0 */
cp_page = grab_meta_page(sbi, start_blk++);
kaddr = page_address(cp_page);
memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
memcpy(kaddr, ckpt, F2FS_BLKSIZE);
set_page_dirty(cp_page);
f2fs_put_page(cp_page, 1);
for (i = 1; i < 1 + cp_payload_blks; i++) {
cp_page = grab_meta_page(sbi, start_blk++);
kaddr = page_address(cp_page);
memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE,
(1 << sbi->log_blocksize));
memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, F2FS_BLKSIZE);
set_page_dirty(cp_page);
f2fs_put_page(cp_page, 1);
}
@ -986,7 +1000,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
write_data_summaries(sbi, start_blk);
start_blk += data_sum_blocks;
if (cpc->reason == CP_UMOUNT) {
if (__remain_node_summaries(cpc->reason)) {
write_node_summaries(sbi, start_blk);
start_blk += NR_CURSEG_NODE_TYPE;
}
@ -994,7 +1008,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* writeout checkpoint block */
cp_page = grab_meta_page(sbi, start_blk);
kaddr = page_address(cp_page);
memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
memcpy(kaddr, ckpt, F2FS_BLKSIZE);
set_page_dirty(cp_page);
f2fs_put_page(cp_page, 1);
@ -1023,7 +1037,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
return;
clear_prefree_segments(sbi);
F2FS_RESET_SB_DIRT(sbi);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
}
/*
@ -1038,10 +1052,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
mutex_lock(&sbi->cp_mutex);
if (!sbi->s_dirty && cpc->reason != CP_DISCARD)
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT)
goto out;
if (unlikely(f2fs_cp_error(sbi)))
goto out;
if (f2fs_readonly(sbi->sb))
goto out;
if (block_operations(sbi))
goto out;
@ -1102,8 +1119,8 @@ int __init create_checkpoint_caches(void)
sizeof(struct ino_entry));
if (!ino_entry_slab)
return -ENOMEM;
inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
sizeof(struct dir_inode_entry));
inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
sizeof(struct inode_entry));
if (!inode_entry_slab) {
kmem_cache_destroy(ino_entry_slab);
return -ENOMEM;

View File

@ -22,6 +22,7 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
#include "trace.h"
#include <trace/events/f2fs.h>
static void f2fs_read_end_io(struct bio *bio, int err)
@ -95,11 +96,9 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
return;
if (is_read_io(fio->rw))
trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw,
fio->type, io->bio);
trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
else
trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw,
fio->type, io->bio);
trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
submit_bio(fio->rw, io->bio);
io->bio = NULL;
@ -132,14 +131,15 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
* Return unlocked page.
*/
int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
block_t blk_addr, int rw)
struct f2fs_io_info *fio)
{
struct bio *bio;
trace_f2fs_submit_page_bio(page, blk_addr, rw);
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(page, fio, 0);
/* Allocate a new bio */
bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw));
bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw));
if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
bio_put(bio);
@ -147,12 +147,12 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
return -EFAULT;
}
submit_bio(rw, bio);
submit_bio(fio->rw, bio);
return 0;
}
void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
block_t blk_addr, struct f2fs_io_info *fio)
struct f2fs_io_info *fio)
{
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io;
@ -160,21 +160,21 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
io = is_read ? &sbi->read_io : &sbi->write_io[btype];
verify_block_addr(sbi, blk_addr);
verify_block_addr(sbi, fio->blk_addr);
down_write(&io->io_rwsem);
if (!is_read)
inc_page_count(sbi, F2FS_WRITEBACK);
if (io->bio && (io->last_block_in_bio != blk_addr - 1 ||
if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 ||
io->fio.rw != fio->rw))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
int bio_blocks = MAX_BIO_BLOCKS(sbi);
io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read);
io->fio = *fio;
}
@ -184,10 +184,11 @@ alloc_new:
goto alloc_new;
}
io->last_block_in_bio = blk_addr;
io->last_block_in_bio = fio->blk_addr;
f2fs_trace_ios(page, fio, 0);
up_write(&io->io_rwsem);
trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
trace_f2fs_submit_page_mbio(page, fio);
}
/*
@ -196,7 +197,7 @@ alloc_new:
* ->node_page
* update block addresses in the node page
*/
static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
static void __set_data_blkaddr(struct dnode_of_data *dn)
{
struct f2fs_node *rn;
__le32 *addr_array;
@ -209,7 +210,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
/* Get physical address of data block */
addr_array = blkaddr_in_node(rn);
addr_array[ofs_in_node] = cpu_to_le32(new_addr);
addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
set_page_dirty(node_page);
}
@ -224,8 +225,8 @@ int reserve_new_block(struct dnode_of_data *dn)
trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
__set_data_blkaddr(dn, NEW_ADDR);
dn->data_blkaddr = NEW_ADDR;
__set_data_blkaddr(dn);
mark_inode_dirty(dn->inode);
sync_inode_page(dn);
return 0;
@ -273,7 +274,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
unsigned int blkbits = inode->i_sb->s_blocksize_bits;
size_t count;
clear_buffer_new(bh_result);
set_buffer_new(bh_result);
map_bh(bh_result, inode->i_sb,
start_blkaddr + pgofs - start_fofs);
count = end_fofs - pgofs + 1;
@ -290,23 +291,24 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
return 0;
}
void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
void update_extent_cache(struct dnode_of_data *dn)
{
struct f2fs_inode_info *fi = F2FS_I(dn->inode);
pgoff_t fofs, start_fofs, end_fofs;
block_t start_blkaddr, end_blkaddr;
int need_update = true;
f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR);
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
dn->ofs_in_node;
f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
/* Update the page address in the parent node */
__set_data_blkaddr(dn, blk_addr);
__set_data_blkaddr(dn);
if (is_inode_flag_set(fi, FI_NO_EXTENT))
return;
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
dn->ofs_in_node;
write_lock(&fi->ext.ext_lock);
start_fofs = fi->ext.fofs;
@ -320,16 +322,16 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
/* Initial extent */
if (fi->ext.len == 0) {
if (blk_addr != NULL_ADDR) {
if (dn->data_blkaddr != NULL_ADDR) {
fi->ext.fofs = fofs;
fi->ext.blk_addr = blk_addr;
fi->ext.blk_addr = dn->data_blkaddr;
fi->ext.len = 1;
}
goto end_update;
}
/* Front merge */
if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) {
fi->ext.fofs--;
fi->ext.blk_addr--;
fi->ext.len++;
@ -337,7 +339,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
}
/* Back merge */
if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) {
if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) {
fi->ext.len++;
goto end_update;
}
@ -376,6 +378,10 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
struct dnode_of_data dn;
struct page *page;
int err;
struct f2fs_io_info fio = {
.type = DATA,
.rw = sync ? READ_SYNC : READA,
};
page = find_get_page(mapping, index);
if (page && PageUptodate(page))
@ -404,8 +410,8 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
return page;
}
err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr,
sync ? READ_SYNC : READA);
fio.blk_addr = dn.data_blkaddr;
err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
if (err)
return ERR_PTR(err);
@ -430,7 +436,10 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
struct dnode_of_data dn;
struct page *page;
int err;
struct f2fs_io_info fio = {
.type = DATA,
.rw = READ_SYNC,
};
repeat:
page = grab_cache_page(mapping, index);
if (!page)
@ -464,8 +473,8 @@ repeat:
return page;
}
err = f2fs_submit_page_bio(F2FS_I_SB(inode), page,
dn.data_blkaddr, READ_SYNC);
fio.blk_addr = dn.data_blkaddr;
err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
if (err)
return ERR_PTR(err);
@ -515,8 +524,12 @@ repeat:
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
SetPageUptodate(page);
} else {
err = f2fs_submit_page_bio(F2FS_I_SB(inode), page,
dn.data_blkaddr, READ_SYNC);
struct f2fs_io_info fio = {
.type = DATA,
.rw = READ_SYNC,
.blk_addr = dn.data_blkaddr,
};
err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
if (err)
goto put_err;
@ -550,30 +563,25 @@ static int __allocate_data_block(struct dnode_of_data *dn)
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_inode_info *fi = F2FS_I(dn->inode);
struct f2fs_summary sum;
block_t new_blkaddr;
struct node_info ni;
int seg = CURSEG_WARM_DATA;
pgoff_t fofs;
int type;
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return -EPERM;
if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
return -ENOSPC;
__set_data_blkaddr(dn, NEW_ADDR);
dn->data_blkaddr = NEW_ADDR;
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
type = CURSEG_WARM_DATA;
if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
seg = CURSEG_DIRECT_IO;
allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg);
/* direct IO doesn't use extent cache to maximize the performance */
set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
update_extent_cache(new_blkaddr, dn);
clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
__set_data_blkaddr(dn);
/* update i_size */
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
@ -581,10 +589,59 @@ static int __allocate_data_block(struct dnode_of_data *dn)
if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT))
i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT));
dn->data_blkaddr = new_blkaddr;
return 0;
}
static void __allocate_data_blocks(struct inode *inode, loff_t offset,
size_t count)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
u64 start = F2FS_BYTES_TO_BLK(offset);
u64 len = F2FS_BYTES_TO_BLK(count);
bool allocated;
u64 end_offset;
while (len) {
f2fs_balance_fs(sbi);
f2fs_lock_op(sbi);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
if (get_dnode_of_data(&dn, start, ALLOC_NODE))
goto out;
allocated = false;
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
while (dn.ofs_in_node < end_offset && len) {
if (dn.data_blkaddr == NULL_ADDR) {
if (__allocate_data_block(&dn))
goto sync_out;
allocated = true;
}
len--;
start++;
dn.ofs_in_node++;
}
if (allocated)
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
}
return;
sync_out:
if (allocated)
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
out:
f2fs_unlock_op(sbi);
return;
}
/*
* get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
* If original data blocks are allocated, then give them to blockdev.
@ -610,10 +667,8 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
if (check_extent_cache(inode, pgofs, bh_result))
goto out;
if (create) {
f2fs_balance_fs(F2FS_I_SB(inode));
if (create)
f2fs_lock_op(F2FS_I_SB(inode));
}
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
@ -627,12 +682,14 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
goto put_out;
if (dn.data_blkaddr != NULL_ADDR) {
set_buffer_new(bh_result);
map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
} else if (create) {
err = __allocate_data_block(&dn);
if (err)
goto put_out;
allocated = true;
set_buffer_new(bh_result);
map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
} else {
goto put_out;
@ -745,7 +802,6 @@ static int f2fs_read_data_pages(struct file *file,
int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
{
struct inode *inode = page->mapping->host;
block_t old_blkaddr, new_blkaddr;
struct dnode_of_data dn;
int err = 0;
@ -754,10 +810,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
if (err)
return err;
old_blkaddr = dn.data_blkaddr;
fio->blk_addr = dn.data_blkaddr;
/* This page is already truncated */
if (old_blkaddr == NULL_ADDR)
if (fio->blk_addr == NULL_ADDR)
goto out_writepage;
set_page_writeback(page);
@ -766,14 +822,14 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
if (unlikely(old_blkaddr != NEW_ADDR &&
if (unlikely(fio->blk_addr != NEW_ADDR &&
!is_cold_data(page) &&
need_inplace_update(inode))) {
rewrite_data_page(page, old_blkaddr, fio);
rewrite_data_page(page, fio);
set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
} else {
write_data_page(page, &dn, &new_blkaddr, fio);
update_extent_cache(new_blkaddr, &dn);
write_data_page(page, &dn, fio);
update_extent_cache(&dn);
set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
}
out_writepage:
@ -812,7 +868,12 @@ static int f2fs_write_data_page(struct page *page,
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
write:
if (unlikely(sbi->por_doing))
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (f2fs_is_drop_cache(inode))
goto out;
if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim &&
available_free_memory(sbi, BASE_CHECK))
goto redirty_out;
/* Dentry blocks are controlled by checkpoint */
@ -826,7 +887,6 @@ write:
/* we should bypass data pages to proceed the kworkder jobs */
if (unlikely(f2fs_cp_error(sbi))) {
SetPageError(page);
unlock_page(page);
goto out;
}
@ -1002,8 +1062,12 @@ put_next:
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
READ_SYNC);
struct f2fs_io_info fio = {
.type = DATA,
.rw = READ_SYNC,
.blk_addr = dn.data_blkaddr,
};
err = f2fs_submit_page_bio(sbi, page, &fio);
if (err)
goto fail;
@ -1092,6 +1156,9 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
trace_f2fs_direct_IO_enter(inode, offset, count, rw);
if (rw & WRITE)
__allocate_data_blocks(inode, offset, count);
err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
if (err < 0 && (rw & WRITE))
f2fs_write_failed(mapping, offset + count);
@ -1101,24 +1168,33 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
return err;
}
static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length)
{
struct inode *inode = page->mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE)
if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
(offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE))
return;
if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
invalidate_inmem_page(inode, page);
if (PageDirty(page))
if (PageDirty(page)) {
if (inode->i_ino == F2FS_META_INO(sbi))
dec_page_count(sbi, F2FS_DIRTY_META);
else if (inode->i_ino == F2FS_NODE_INO(sbi))
dec_page_count(sbi, F2FS_DIRTY_NODES);
else
inode_dec_dirty_pages(inode);
}
ClearPagePrivate(page);
}
static int f2fs_release_data_page(struct page *page, gfp_t wait)
int f2fs_release_page(struct page *page, gfp_t wait)
{
/* If this is dirty page, keep PagePrivate */
if (PageDirty(page))
return 0;
ClearPagePrivate(page);
return 1;
}
@ -1132,7 +1208,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
SetPageUptodate(page);
if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) {
if (f2fs_is_atomic_file(inode)) {
register_inmem_page(inode, page);
return 1;
}
@ -1168,8 +1244,8 @@ const struct address_space_operations f2fs_dblock_aops = {
.write_begin = f2fs_write_begin,
.write_end = f2fs_write_end,
.set_page_dirty = f2fs_set_data_page_dirty,
.invalidatepage = f2fs_invalidate_data_page,
.releasepage = f2fs_release_data_page,
.invalidatepage = f2fs_invalidate_page,
.releasepage = f2fs_release_page,
.direct_IO = f2fs_direct_IO,
.bmap = f2fs_bmap,
};

View File

@ -40,6 +40,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_dirs = sbi->n_dirty_dirs;
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->wb_pages = get_pages(sbi, F2FS_WRITEBACK);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
@ -57,7 +58,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->node_pages = NODE_MAPPING(sbi)->nrpages;
si->meta_pages = META_MAPPING(sbi)->nrpages;
si->nats = NM_I(sbi)->nat_cnt;
si->sits = SIT_I(sbi)->dirty_sentries;
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
si->sits = MAIN_SEGS(sbi);
si->dirty_sits = SIT_I(sbi)->dirty_sentries;
si->fnids = NM_I(sbi)->fcnt;
si->bg_gc = sbi->bg_gc;
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
@ -79,6 +82,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->segment_count[i] = sbi->segment_count[i];
si->block_count[i] = sbi->block_count[i];
}
si->inplace_count = atomic_read(&sbi->inplace_count);
}
/*
@ -137,6 +142,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry);
si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
si->base_mem += SIT_VBLOCK_MAP_SIZE;
if (sbi->segs_per_sec > 1)
si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
@ -159,20 +165,32 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
/* build gc */
si->base_mem += sizeof(struct f2fs_gc_kthread);
get_cache:
si->cache_mem = 0;
/* build gc */
if (sbi->gc_thread)
si->cache_mem += sizeof(struct f2fs_gc_kthread);
/* build merge flush thread */
if (SM_I(sbi)->cmd_control_info)
si->cache_mem += sizeof(struct flush_cmd_control);
/* free nids */
si->cache_mem = NM_I(sbi)->fcnt;
si->cache_mem += NM_I(sbi)->nat_cnt;
npages = NODE_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid);
si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
sizeof(struct nat_entry_set);
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
for (i = 0; i <= UPDATE_INO; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
si->page_mem = 0;
npages = NODE_MAPPING(sbi)->nrpages;
si->page_mem += npages << PAGE_CACHE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
si->page_mem += npages << PAGE_CACHE_SHIFT;
}
static int stat_show(struct seq_file *s, void *v)
@ -250,16 +268,16 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
si->hit_ext, si->total_ext);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - inmem: %4d\n",
si->inmem_pages);
seq_printf(s, " - inmem: %4d, wb: %4d\n",
si->inmem_pages, si->wb_pages);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d\n",
si->ndirty_dent, si->ndirty_dirs);
seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - NATs: %9d\n - SITs: %9d\n",
si->nats, si->sits);
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
si->dirty_nats, si->nats, si->dirty_sits, si->sits);
seq_printf(s, " - free_nids: %9d\n",
si->fnids);
seq_puts(s, "\nDistribution of User Blocks:");
@ -277,6 +295,7 @@ static int stat_show(struct seq_file *s, void *v)
for (j = 0; j < si->util_free; j++)
seq_putc(s, '-');
seq_puts(s, "]\n\n");
seq_printf(s, "IPU: %u blocks\n", si->inplace_count);
seq_printf(s, "SSR: %u blocks in %u segments\n",
si->block_count[SSR], si->segment_count[SSR]);
seq_printf(s, "LFS: %u blocks in %u segments\n",
@ -289,9 +308,14 @@ static int stat_show(struct seq_file *s, void *v)
/* memory footprint */
update_mem_info(si->sbi);
seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n",
(si->base_mem + si->cache_mem) >> 10,
si->base_mem >> 10, si->cache_mem >> 10);
seq_printf(s, "\nMemory: %u KB\n",
(si->base_mem + si->cache_mem + si->page_mem) >> 10);
seq_printf(s, " - static: %u KB\n",
si->base_mem >> 10);
seq_printf(s, " - cached: %u KB\n",
si->cache_mem >> 10);
seq_printf(s, " - paged : %u KB\n",
si->page_mem >> 10);
}
mutex_unlock(&f2fs_stat_mutex);
return 0;
@ -331,6 +355,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->inline_inode, 0);
atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->inplace_count, 0);
mutex_lock(&f2fs_stat_mutex);
list_add_tail(&si->stat_list, &f2fs_stat_list);

View File

@ -286,8 +286,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
f2fs_wait_on_page_writeback(page, type);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode);
if (!f2fs_has_inline_dentry(dir))
kunmap(page);
f2fs_dentry_kunmap(dir, page);
set_page_dirty(page);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
mark_inode_dirty(dir);

View File

@ -28,7 +28,7 @@
do { \
if (unlikely(condition)) { \
WARN_ON(1); \
sbi->need_fsck = true; \
set_sbi_flag(sbi, SBI_NEED_FSCK); \
} \
} while (0)
#define f2fs_down_write(x, y) down_write(x)
@ -100,10 +100,15 @@ enum {
enum {
CP_UMOUNT,
CP_FASTBOOT,
CP_SYNC,
CP_DISCARD,
};
#define DEF_BATCHED_TRIM_SECTIONS 32
#define BATCHED_TRIM_SEGMENTS(sbi) \
(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
struct cp_control {
int reason;
__u64 trim_start;
@ -136,8 +141,14 @@ struct ino_entry {
nid_t ino; /* inode number */
};
/* for the list of directory inodes */
struct dir_inode_entry {
/*
* for the list of directory inodes or gc inodes.
* NOTE: there are two slab users for this structure, if we add/modify/delete
* fields in structure for one of slab users, it may affect fields or size of
* other one, in this condition, it's better to split both of slab and related
* data structure.
*/
struct inode_entry {
struct list_head list; /* list head */
struct inode *inode; /* vfs inode pointer */
};
@ -196,11 +207,14 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
*/
#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS
#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS
#define F2FS_IOC_GETVERSION FS_IOC_GETVERSION
#define F2FS_IOCTL_MAGIC 0xf5
#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
@ -295,7 +309,7 @@ struct f2fs_inode_info {
nid_t i_xattr_nid; /* node id that contains xattrs */
unsigned long long xattr_ver; /* cp version of xattr modification */
struct extent_info ext; /* in-memory extent cache entry */
struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */
struct inode_entry *dirty_dir; /* the pointer of dirty dir */
struct radix_tree_root inmem_root; /* radix tree for inmem pages */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
@ -398,7 +412,8 @@ enum {
CURSEG_HOT_NODE, /* direct node blocks of directory files */
CURSEG_WARM_NODE, /* direct node blocks of normal files */
CURSEG_COLD_NODE, /* indirect node blocks */
NO_CHECK_TYPE
NO_CHECK_TYPE,
CURSEG_DIRECT_IO, /* to use for the direct IO path */
};
struct flush_cmd {
@ -437,6 +452,9 @@ struct f2fs_sm_info {
int nr_discards; /* # of discards in the list */
int max_discards; /* max. discards to be issued */
/* for batched trimming */
unsigned int trim_sections; /* # of sections to trim */
struct list_head sit_entry_set; /* sit entry set list */
unsigned int ipu_policy; /* in-place-update policy */
@ -489,6 +507,7 @@ enum page_type {
struct f2fs_io_info {
enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */
block_t blk_addr; /* block address to be written */
};
#define is_read_io(rw) (((rw) & 1) == READ)
@ -508,13 +527,20 @@ struct inode_management {
unsigned long ino_num; /* number of entries */
};
/* For s_flag in struct f2fs_sb_info */
enum {
SBI_IS_DIRTY, /* dirty flag for checkpoint */
SBI_IS_CLOSE, /* specify unmounting */
SBI_NEED_FSCK, /* need fsck.f2fs to fix */
SBI_POR_DOING, /* recovery is doing or not */
};
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
struct buffer_head *raw_super_buf; /* buffer head of raw sb */
struct f2fs_super_block *raw_super; /* raw super block pointer */
int s_dirty; /* dirty flag for checkpoint */
bool need_fsck; /* need fsck.f2fs to fix */
int s_flag; /* flags for sbi */
/* for node-related operations */
struct f2fs_nm_info *nm_info; /* node manager */
@ -534,7 +560,6 @@ struct f2fs_sb_info {
struct rw_semaphore cp_rwsem; /* blocking FS operations */
struct rw_semaphore node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
bool por_doing; /* recovery is doing or not */
wait_queue_head_t cp_wait;
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
@ -589,6 +614,7 @@ struct f2fs_sb_info {
struct f2fs_stat_info *stat_info; /* FS status information */
unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */
atomic_t inplace_count; /* # of inplace update */
int total_hit_ext, read_hit_ext; /* extent cache hit ratio */
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
@ -686,14 +712,19 @@ static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi)
return sbi->node_inode->i_mapping;
}
static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi)
static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type)
{
sbi->s_dirty = 1;
return sbi->s_flag & (0x01 << type);
}
static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi)
static inline void set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type)
{
sbi->s_dirty = 0;
sbi->s_flag |= (0x01 << type);
}
static inline void clear_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type)
{
sbi->s_flag &= ~(0x01 << type);
}
static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
@ -741,6 +772,28 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
up_write(&sbi->cp_rwsem);
}
static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
{
int reason = CP_SYNC;
if (test_opt(sbi, FASTBOOT))
reason = CP_FASTBOOT;
if (is_sbi_flag_set(sbi, SBI_IS_CLOSE))
reason = CP_UMOUNT;
return reason;
}
static inline bool __remain_node_summaries(int reason)
{
return (reason == CP_UMOUNT || reason == CP_FASTBOOT);
}
static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi)
{
return (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) ||
is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FASTBOOT_FLAG));
}
/*
* Check whether the given nid is within node id range.
*/
@ -805,7 +858,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
{
atomic_inc(&sbi->nr_pages[count_type]);
F2FS_SET_SB_DIRT(sbi);
set_sbi_flag(sbi, SBI_IS_DIRTY);
}
static inline void inode_inc_dirty_pages(struct inode *inode)
@ -1113,6 +1166,7 @@ enum {
FI_NEED_IPU, /* used for ipu per file */
FI_ATOMIC_FILE, /* indicate atomic file */
FI_VOLATILE_FILE, /* indicate volatile file */
FI_DROP_CACHE, /* drop dirty page cache */
FI_DATA_EXIST, /* indicate data exists */
};
@ -1220,6 +1274,11 @@ static inline bool f2fs_is_volatile_file(struct inode *inode)
return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE);
}
static inline bool f2fs_is_drop_cache(struct inode *inode)
{
return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE);
}
static inline void *inline_data_addr(struct page *page)
{
struct f2fs_inode *ri = F2FS_INODE(page);
@ -1389,7 +1448,6 @@ void destroy_node_manager_caches(void);
* segment.c
*/
void register_inmem_page(struct inode *, struct page *);
void invalidate_inmem_page(struct inode *, struct page *);
void commit_inmem_pages(struct inode *, bool);
void f2fs_balance_fs(struct f2fs_sb_info *);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
@ -1401,16 +1459,16 @@ void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
void release_discard_addrs(struct f2fs_sb_info *);
void discard_next_dnode(struct f2fs_sb_info *, block_t);
int npages_for_summary_flush(struct f2fs_sb_info *);
int npages_for_summary_flush(struct f2fs_sb_info *, bool);
void allocate_new_segments(struct f2fs_sb_info *);
int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
void write_meta_page(struct f2fs_sb_info *, struct page *);
void write_node_page(struct f2fs_sb_info *, struct page *,
struct f2fs_io_info *, unsigned int, block_t, block_t *);
void write_data_page(struct page *, struct dnode_of_data *, block_t *,
unsigned int, struct f2fs_io_info *);
void write_data_page(struct page *, struct dnode_of_data *,
struct f2fs_io_info *);
void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *);
void rewrite_data_page(struct page *, struct f2fs_io_info *);
void recover_data_page(struct f2fs_sb_info *, struct page *,
struct f2fs_summary *, block_t, block_t);
void allocate_data_block(struct f2fs_sb_info *, struct page *,
@ -1457,17 +1515,20 @@ void destroy_checkpoint_caches(void);
* data.c
*/
void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int);
void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t,
int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *,
struct f2fs_io_info *);
void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *,
struct f2fs_io_info *);
int reserve_new_block(struct dnode_of_data *);
int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
void update_extent_cache(block_t, struct dnode_of_data *);
void update_extent_cache(struct dnode_of_data *);
struct page *find_data_page(struct inode *, pgoff_t, bool);
struct page *get_lock_data_page(struct inode *, pgoff_t);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
int do_write_data_page(struct page *, struct f2fs_io_info *);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
int f2fs_release_page(struct page *, gfp_t);
/*
* gc.c
@ -1477,8 +1538,6 @@ void stop_gc_thread(struct f2fs_sb_info *);
block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
int f2fs_gc(struct f2fs_sb_info *);
void build_gc_manager(struct f2fs_sb_info *);
int __init create_gc_caches(void);
void destroy_gc_caches(void);
/*
* recovery.c
@ -1497,9 +1556,9 @@ struct f2fs_stat_info {
int main_area_segs, main_area_sections, main_area_zones;
int hit_ext, total_ext;
int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
int nats, sits, fnids;
int nats, dirty_nats, sits, dirty_sits, fnids;
int total_count, utilization;
int bg_gc, inline_inode, inline_dir, inmem_pages;
int bg_gc, inline_inode, inline_dir, inmem_pages, wb_pages;
unsigned int valid_count, valid_node_count, valid_inode_count;
unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid;
@ -1514,7 +1573,8 @@ struct f2fs_stat_info {
unsigned int segment_count[2];
unsigned int block_count[2];
unsigned base_mem, cache_mem;
unsigned int inplace_count;
unsigned base_mem, cache_mem, page_mem;
};
static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
@ -1553,7 +1613,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
((sbi)->segment_count[(curseg)->alloc_type]++)
#define stat_inc_block_count(sbi, curseg) \
((sbi)->block_count[(curseg)->alloc_type]++)
#define stat_inc_inplace_blocks(sbi) \
(atomic_inc(&(sbi)->inplace_count))
#define stat_inc_seg_count(sbi, type) \
do { \
struct f2fs_stat_info *si = F2FS_STAT(sbi); \
@ -1599,6 +1660,7 @@ void f2fs_destroy_root_stats(void);
#define stat_dec_inline_dir(inode)
#define stat_inc_seg_type(sbi, curseg)
#define stat_inc_block_count(sbi, curseg)
#define stat_inc_inplace_blocks(sbi)
#define stat_inc_seg_count(si, type)
#define stat_inc_tot_blk_count(si, blks)
#define stat_inc_data_blk_count(si, blks)
@ -1619,6 +1681,7 @@ extern const struct address_space_operations f2fs_meta_aops;
extern const struct inode_operations f2fs_dir_inode_operations;
extern const struct inode_operations f2fs_symlink_inode_operations;
extern const struct inode_operations f2fs_special_inode_operations;
extern struct kmem_cache *inode_entry_slab;
/*
* inline.c
@ -1629,7 +1692,6 @@ int f2fs_read_inline_data(struct inode *, struct page *);
int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
int f2fs_convert_inline_inode(struct inode *);
int f2fs_write_inline_data(struct inode *, struct page *);
void truncate_inline_data(struct page *, u64);
bool recover_inline_data(struct inode *, struct page *);
struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *,
struct page **);

View File

@ -26,6 +26,7 @@
#include "segment.h"
#include "xattr.h"
#include "acl.h"
#include "trace.h"
#include <trace/events/f2fs.h>
static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
@ -245,6 +246,10 @@ go_write:
sync_nodes:
sync_node_pages(sbi, ino, &wbc);
/* if cp_error was enabled, we should avoid infinite loop */
if (unlikely(f2fs_cp_error(sbi)))
goto out;
if (need_inode_block_update(sbi, ino)) {
mark_inode_dirty_sync(inode);
f2fs_write_inode(inode, NULL);
@ -264,6 +269,7 @@ flush_out:
ret = f2fs_issue_flush(sbi);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
f2fs_trace_ios(NULL, NULL, 1);
return ret;
}
@ -350,7 +356,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
/* find data/hole in dnode block */
for (; dn.ofs_in_node < end_offset;
dn.ofs_in_node++, pgofs++,
data_ofs = pgofs << PAGE_CACHE_SHIFT) {
data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) {
block_t blkaddr;
blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
@ -426,7 +432,8 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
if (blkaddr == NULL_ADDR)
continue;
update_extent_cache(NULL_ADDR, dn);
dn->data_blkaddr = NULL_ADDR;
update_extent_cache(dn);
invalidate_blocks(sbi, blkaddr);
nr_free++;
}
@ -483,8 +490,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
trace_f2fs_truncate_blocks_enter(inode, from);
free_from = (pgoff_t)
((from + blocksize - 1) >> (sbi->log_blocksize));
free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1);
if (lock)
f2fs_lock_op(sbi);
@ -835,6 +841,19 @@ static long f2fs_fallocate(struct file *file, int mode,
return ret;
}
static int f2fs_release_file(struct inode *inode, struct file *filp)
{
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
commit_inmem_pages(inode, true);
if (f2fs_is_volatile_file(inode)) {
set_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
filemap_fdatawrite(inode->i_mapping);
clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
}
return 0;
}
#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
@ -905,29 +924,30 @@ out:
return ret;
}
static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
return put_user(inode->i_generation, (int __user *)arg);
}
static int f2fs_ioc_start_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
if (!inode_owner_or_capable(inode))
return -EACCES;
f2fs_balance_fs(sbi);
f2fs_balance_fs(F2FS_I_SB(inode));
if (f2fs_is_atomic_file(inode))
return 0;
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
return f2fs_convert_inline_inode(inode);
}
static int f2fs_release_file(struct inode *inode, struct file *filp)
{
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
commit_inmem_pages(inode, true);
return 0;
}
static int f2fs_ioc_commit_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
@ -948,6 +968,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
ret = f2fs_sync_file(filp, 0, LONG_MAX, 0);
mnt_drop_write_file(filp);
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
return ret;
}
@ -958,11 +979,56 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
if (!inode_owner_or_capable(inode))
return -EACCES;
if (f2fs_is_volatile_file(inode))
return 0;
set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
return f2fs_convert_inline_inode(inode);
}
static int f2fs_ioc_release_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!f2fs_is_volatile_file(inode))
return 0;
punch_hole(inode, 0, F2FS_BLKSIZE);
return 0;
}
static int f2fs_ioc_abort_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
if (!inode_owner_or_capable(inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
f2fs_balance_fs(F2FS_I_SB(inode));
if (f2fs_is_atomic_file(inode)) {
commit_inmem_pages(inode, false);
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
}
if (f2fs_is_volatile_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
filemap_fdatawrite(inode->i_mapping);
set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
}
mnt_drop_write_file(filp);
return ret;
}
static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@ -1000,12 +1066,18 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_getflags(filp, arg);
case F2FS_IOC_SETFLAGS:
return f2fs_ioc_setflags(filp, arg);
case F2FS_IOC_GETVERSION:
return f2fs_ioc_getversion(filp, arg);
case F2FS_IOC_START_ATOMIC_WRITE:
return f2fs_ioc_start_atomic_write(filp);
case F2FS_IOC_COMMIT_ATOMIC_WRITE:
return f2fs_ioc_commit_atomic_write(filp);
case F2FS_IOC_START_VOLATILE_WRITE:
return f2fs_ioc_start_volatile_write(filp);
case F2FS_IOC_RELEASE_VOLATILE_WRITE:
return f2fs_ioc_release_volatile_write(filp);
case F2FS_IOC_ABORT_VOLATILE_WRITE:
return f2fs_ioc_abort_volatile_write(filp);
case FITRIM:
return f2fs_ioc_fitrim(filp, arg);
default:

View File

@ -24,8 +24,6 @@
#include "gc.h"
#include <trace/events/f2fs.h>
static struct kmem_cache *winode_slab;
static int gc_thread_func(void *data)
{
struct f2fs_sb_info *sbi = data;
@ -46,7 +44,7 @@ static int gc_thread_func(void *data)
break;
if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
wait_ms = increase_sleep_time(gc_th, wait_ms);
increase_sleep_time(gc_th, &wait_ms);
continue;
}
@ -67,15 +65,15 @@ static int gc_thread_func(void *data)
continue;
if (!is_idle(sbi)) {
wait_ms = increase_sleep_time(gc_th, wait_ms);
increase_sleep_time(gc_th, &wait_ms);
mutex_unlock(&sbi->gc_mutex);
continue;
}
if (has_enough_invalid_blocks(sbi))
wait_ms = decrease_sleep_time(gc_th, wait_ms);
decrease_sleep_time(gc_th, &wait_ms);
else
wait_ms = increase_sleep_time(gc_th, wait_ms);
increase_sleep_time(gc_th, &wait_ms);
stat_inc_bggc_count(sbi);
@ -356,13 +354,10 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
iput(inode);
return;
}
new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS);
new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
new_ie->inode = inode;
retry:
if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) {
cond_resched();
goto retry;
}
f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie);
list_add_tail(&new_ie->list, &gc_list->ilist);
}
@ -373,7 +368,7 @@ static void put_gc_inode(struct gc_inode_list *gc_list)
radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
iput(ie->inode);
list_del(&ie->list);
kmem_cache_free(winode_slab, ie);
kmem_cache_free(inode_entry_slab, ie);
}
}
@ -703,8 +698,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
.iroot = RADIX_TREE_INIT(GFP_NOFS),
};
cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC;
cpc.reason = __get_cp_reason(sbi);
gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
@ -750,17 +744,3 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
{
DIRTY_I(sbi)->v_ops = &default_v_ops;
}
int __init create_gc_caches(void)
{
winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
sizeof(struct inode_entry));
if (!winode_slab)
return -ENOMEM;
return 0;
}
void destroy_gc_caches(void)
{
kmem_cache_destroy(winode_slab);
}

View File

@ -35,11 +35,6 @@ struct f2fs_gc_kthread {
unsigned int gc_idle;
};
struct inode_entry {
struct list_head list;
struct inode *inode;
};
struct gc_inode_list {
struct list_head ilist;
struct radix_tree_root iroot;
@ -69,26 +64,26 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
}
static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait)
static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th,
long *wait)
{
if (wait == gc_th->no_gc_sleep_time)
return wait;
if (*wait == gc_th->no_gc_sleep_time)
return;
wait += gc_th->min_sleep_time;
if (wait > gc_th->max_sleep_time)
wait = gc_th->max_sleep_time;
return wait;
*wait += gc_th->min_sleep_time;
if (*wait > gc_th->max_sleep_time)
*wait = gc_th->max_sleep_time;
}
static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait)
static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th,
long *wait)
{
if (wait == gc_th->no_gc_sleep_time)
wait = gc_th->max_sleep_time;
if (*wait == gc_th->no_gc_sleep_time)
*wait = gc_th->max_sleep_time;
wait -= gc_th->min_sleep_time;
if (wait <= gc_th->min_sleep_time)
wait = gc_th->min_sleep_time;
return wait;
*wait -= gc_th->min_sleep_time;
if (*wait <= gc_th->min_sleep_time)
*wait = gc_th->min_sleep_time;
}
static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)

View File

@ -50,6 +50,12 @@ void read_inline_data(struct page *page, struct page *ipage)
SetPageUptodate(page);
}
static void truncate_inline_data(struct page *ipage)
{
f2fs_wait_on_page_writeback(ipage, NODE);
memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA);
}
int f2fs_read_inline_data(struct inode *inode, struct page *page)
{
struct page *ipage;
@ -79,7 +85,6 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
{
void *src_addr, *dst_addr;
block_t new_blk_addr;
struct f2fs_io_info fio = {
.type = DATA,
.rw = WRITE_SYNC | REQ_PRIO,
@ -115,9 +120,9 @@ no_update:
/* write data page to try to make data consistent */
set_page_writeback(page);
write_data_page(page, dn, &new_blk_addr, &fio);
update_extent_cache(new_blk_addr, dn);
fio.blk_addr = dn->data_blkaddr;
write_data_page(page, dn, &fio);
update_extent_cache(dn);
f2fs_wait_on_page_writeback(page, DATA);
if (dirty)
inode_dec_dirty_pages(dn->inode);
@ -126,7 +131,7 @@ no_update:
set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
/* clear inline data and flag after data writeback */
truncate_inline_data(dn->inode_page, 0);
truncate_inline_data(dn->inode_page);
clear_out:
stat_dec_inline_inode(dn->inode);
f2fs_clear_inline_inode(dn->inode);
@ -199,19 +204,6 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
return 0;
}
void truncate_inline_data(struct page *ipage, u64 from)
{
void *addr;
if (from >= MAX_INLINE_DATA)
return;
f2fs_wait_on_page_writeback(ipage, NODE);
addr = inline_data_addr(ipage);
memset(addr + from, 0, MAX_INLINE_DATA - from);
}
bool recover_inline_data(struct inode *inode, struct page *npage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@ -253,7 +245,7 @@ process_inline:
if (f2fs_has_inline_data(inode)) {
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
truncate_inline_data(ipage, 0);
truncate_inline_data(ipage);
f2fs_clear_inline_inode(inode);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
@ -371,7 +363,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
set_page_dirty(page);
/* clear inline dir and flag after data writeback */
truncate_inline_data(ipage, 0);
truncate_inline_data(ipage);
stat_dec_inline_dir(dir);
clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);

View File

@ -67,29 +67,23 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
static int __recover_inline_status(struct inode *inode, struct page *ipage)
static void __recover_inline_status(struct inode *inode, struct page *ipage)
{
void *inline_data = inline_data_addr(ipage);
struct f2fs_inode *ri;
void *zbuf;
zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS);
if (!zbuf)
return -ENOMEM;
if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) {
kfree(zbuf);
return 0;
}
kfree(zbuf);
__le32 *start = inline_data;
__le32 *end = start + MAX_INLINE_DATA / sizeof(__le32);
while (start < end) {
if (*start++) {
f2fs_wait_on_page_writeback(ipage, NODE);
set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
ri = F2FS_INODE(ipage);
set_raw_inline(F2FS_I(inode), ri);
set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage));
set_page_dirty(ipage);
return 0;
return;
}
}
return;
}
static int do_read_inode(struct inode *inode)
@ -98,7 +92,6 @@ static int do_read_inode(struct inode *inode)
struct f2fs_inode_info *fi = F2FS_I(inode);
struct page *node_page;
struct f2fs_inode *ri;
int err = 0;
/* Check if ino is within scope */
if (check_nid_range(sbi, inode->i_ino)) {
@ -142,7 +135,7 @@ static int do_read_inode(struct inode *inode)
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
err = __recover_inline_status(inode, node_page);
__recover_inline_status(inode, node_page);
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
@ -152,7 +145,7 @@ static int do_read_inode(struct inode *inode)
stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode);
return err;
return 0;
}
struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
@ -304,7 +297,7 @@ void f2fs_evict_inode(struct inode *inode)
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
if (f2fs_is_atomic_file(inode))
commit_inmem_pages(inode, true);
trace_f2fs_evict_inode(inode);

View File

@ -299,7 +299,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_op = &f2fs_dir_inode_operations;
inode->i_fop = &f2fs_dir_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
set_inode_flag(F2FS_I(inode), FI_INC_LINK);
f2fs_lock_op(sbi);

View File

@ -19,6 +19,7 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
#include "trace.h"
#include <trace/events/f2fs.h>
#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
@ -57,12 +58,13 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
} else if (type == INO_ENTRIES) {
int i;
if (sbi->sb->s_bdi->dirty_exceeded)
return false;
for (i = 0; i <= UPDATE_INO; i++)
mem_size += (sbi->im[i].ino_num *
sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else {
if (sbi->sb->s_bdi->dirty_exceeded)
return false;
}
return res;
}
@ -268,7 +270,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
e = __lookup_nat_cache(nm_i, ni->nid);
if (!e) {
e = grab_nat_entry(nm_i, ni->nid);
e->ni = *ni;
copy_node_info(&e->ni, ni);
f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
/*
@ -276,7 +278,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
* previous nat entry can be remained in nat cache.
* So, reinitialize it with new information.
*/
e->ni = *ni;
copy_node_info(&e->ni, ni);
f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
}
@ -346,7 +348,6 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
struct nat_entry *e;
int i;
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
ni->nid = nid;
/* Check nat cache */
@ -361,6 +362,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
if (e)
return;
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
/* Check current segment summary */
mutex_lock(&curseg->curseg_mutex);
i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
@ -471,7 +474,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct page *npage[4];
struct page *parent;
struct page *parent = NULL;
int offset[4];
unsigned int noffset[4];
nid_t nids[4];
@ -488,6 +491,14 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
if (IS_ERR(npage[0]))
return PTR_ERR(npage[0]);
}
/* if inline_data is set, should not report any block indices */
if (f2fs_has_inline_data(dn->inode) && index) {
err = -EINVAL;
f2fs_put_page(npage[0], 1);
goto release_out;
}
parent = npage[0];
if (level != 0)
nids[1] = get_nid(parent, offset[0], true);
@ -585,7 +596,7 @@ static void truncate_node(struct dnode_of_data *dn)
}
invalidate:
clear_node_page_dirty(dn->node_page);
F2FS_SET_SB_DIRT(sbi);
set_sbi_flag(sbi, SBI_IS_DIRTY);
f2fs_put_page(dn->node_page, 1);
@ -976,6 +987,10 @@ static int read_node_page(struct page *page, int rw)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
struct node_info ni;
struct f2fs_io_info fio = {
.type = NODE,
.rw = rw,
};
get_node_info(sbi, page->index, &ni);
@ -987,7 +1002,8 @@ static int read_node_page(struct page *page, int rw)
if (PageUptodate(page))
return LOCKED_PAGE;
return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw);
fio.blk_addr = ni.blk_addr;
return f2fs_submit_page_bio(sbi, page, &fio);
}
/*
@ -1028,11 +1044,11 @@ repeat:
err = read_node_page(page, READ_SYNC);
if (err < 0)
return ERR_PTR(err);
else if (err == LOCKED_PAGE)
goto got_it;
else if (err != LOCKED_PAGE)
lock_page(page);
if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
ClearPageUptodate(page);
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
@ -1040,7 +1056,6 @@ repeat:
f2fs_put_page(page, 1);
goto repeat;
}
got_it:
return page;
}
@ -1268,7 +1283,6 @@ static int f2fs_write_node_page(struct page *page,
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
nid_t nid;
block_t new_addr;
struct node_info ni;
struct f2fs_io_info fio = {
.type = NODE,
@ -1277,7 +1291,7 @@ static int f2fs_write_node_page(struct page *page,
trace_f2fs_writepage(page, NODE);
if (unlikely(sbi->por_doing))
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
@ -1303,9 +1317,11 @@ static int f2fs_write_node_page(struct page *page,
} else {
down_read(&sbi->node_write);
}
set_page_writeback(page);
write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
fio.blk_addr = ni.blk_addr;
write_node_page(sbi, page, nid, &fio);
set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
up_read(&sbi->node_write);
unlock_page(page);
@ -1355,26 +1371,12 @@ static int f2fs_set_node_page_dirty(struct page *page)
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
SetPagePrivate(page);
f2fs_trace_pid(page);
return 1;
}
return 0;
}
static void f2fs_invalidate_node_page(struct page *page, unsigned int offset,
unsigned int length)
{
struct inode *inode = page->mapping->host;
if (PageDirty(page))
dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES);
ClearPagePrivate(page);
}
static int f2fs_release_node_page(struct page *page, gfp_t wait)
{
ClearPagePrivate(page);
return 1;
}
/*
* Structure of the f2fs node operations
*/
@ -1382,8 +1384,8 @@ const struct address_space_operations f2fs_node_aops = {
.writepage = f2fs_write_node_page,
.writepages = f2fs_write_node_pages,
.set_page_dirty = f2fs_set_node_page_dirty,
.invalidatepage = f2fs_invalidate_node_page,
.releasepage = f2fs_release_node_page,
.invalidatepage = f2fs_invalidate_page,
.releasepage = f2fs_release_page,
};
static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
@ -1726,80 +1728,41 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
return 0;
}
/*
* ra_sum_pages() merge contiguous pages into one bio and submit.
* these pre-read pages are allocated in bd_inode's mapping tree.
*/
static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
int start, int nrpages)
{
struct inode *inode = sbi->sb->s_bdev->bd_inode;
struct address_space *mapping = inode->i_mapping;
int i, page_idx = start;
struct f2fs_io_info fio = {
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO
};
for (i = 0; page_idx < start + nrpages; page_idx++, i++) {
/* alloc page in bd_inode for reading node summary info */
pages[i] = grab_cache_page(mapping, page_idx);
if (!pages[i])
break;
f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio);
}
f2fs_submit_merged_bio(sbi, META, READ);
return i;
}
int restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum)
{
struct f2fs_node *rn;
struct f2fs_summary *sum_entry;
struct inode *inode = sbi->sb->s_bdev->bd_inode;
block_t addr;
int bio_blocks = MAX_BIO_BLOCKS(sbi);
struct page *pages[bio_blocks];
int i, idx, last_offset, nrpages, err = 0;
int i, idx, last_offset, nrpages;
/* scan the node segment */
last_offset = sbi->blocks_per_seg;
addr = START_BLOCK(sbi, segno);
sum_entry = &sum->entries[0];
for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) {
for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
nrpages = min(last_offset - i, bio_blocks);
/* readahead node pages */
nrpages = ra_sum_pages(sbi, pages, addr, nrpages);
if (!nrpages)
return -ENOMEM;
ra_meta_pages(sbi, addr, nrpages, META_POR);
for (idx = 0; idx < nrpages; idx++) {
if (err)
goto skip;
for (idx = addr; idx < addr + nrpages; idx++) {
struct page *page = get_meta_page(sbi, idx);
lock_page(pages[idx]);
if (unlikely(!PageUptodate(pages[idx]))) {
err = -EIO;
} else {
rn = F2FS_NODE(pages[idx]);
rn = F2FS_NODE(page);
sum_entry->nid = rn->footer.nid;
sum_entry->version = 0;
sum_entry->ofs_in_node = 0;
sum_entry++;
}
unlock_page(pages[idx]);
skip:
page_cache_release(pages[idx]);
f2fs_put_page(page, 1);
}
invalidate_mapping_pages(inode->i_mapping, addr,
invalidate_mapping_pages(META_MAPPING(sbi), addr,
addr + nrpages);
}
return err;
return 0;
}
static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
@ -1923,7 +1886,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
struct nat_entry_set *setvec[NATVEC_SIZE];
struct nat_entry_set *setvec[SETVEC_SIZE];
struct nat_entry_set *set, *tmp;
unsigned int found;
nid_t set_idx = 0;
@ -1940,7 +1903,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
remove_nats_in_journal(sbi);
while ((found = __gang_lookup_nat_set(nm_i,
set_idx, NATVEC_SIZE, setvec))) {
set_idx, SETVEC_SIZE, setvec))) {
unsigned idx;
set_idx = setvec[found - 1]->set + 1;
for (idx = 0; idx < found; idx++)
@ -2020,6 +1983,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i, *next_i;
struct nat_entry *natvec[NATVEC_SIZE];
struct nat_entry_set *setvec[SETVEC_SIZE];
nid_t nid = 0;
unsigned int found;
@ -2044,11 +2008,27 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
while ((found = __gang_lookup_nat_cache(nm_i,
nid, NATVEC_SIZE, natvec))) {
unsigned idx;
nid = nat_get_nid(natvec[found - 1]) + 1;
for (idx = 0; idx < found; idx++)
__del_from_nat_cache(nm_i, natvec[idx]);
}
f2fs_bug_on(sbi, nm_i->nat_cnt);
/* destroy nat set cache */
nid = 0;
while ((found = __gang_lookup_nat_set(nm_i,
nid, SETVEC_SIZE, setvec))) {
unsigned idx;
nid = setvec[found - 1]->set + 1;
for (idx = 0; idx < found; idx++) {
/* entry_cnt is not zero, when cp_error was occurred */
f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list));
radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set);
kmem_cache_free(nat_entry_set_slab, setvec[idx]);
}
}
up_write(&nm_i->nat_tree_lock);
kfree(nm_i->nat_bitmap);

View File

@ -25,10 +25,19 @@
/* vector size for gang look-up from nat cache that consists of radix tree */
#define NATVEC_SIZE 64
#define SETVEC_SIZE 32
/* return value for read_node_page */
#define LOCKED_PAGE 1
/* For flag in struct node_info */
enum {
IS_CHECKPOINTED, /* is it checkpointed before? */
HAS_FSYNCED_INODE, /* is the inode fsynced before? */
HAS_LAST_FSYNC, /* has the latest node fsync mark? */
IS_DIRTY, /* this nat entry is dirty? */
};
/*
* For node information
*/
@ -37,18 +46,11 @@ struct node_info {
nid_t ino; /* inode number of the node's owner */
block_t blk_addr; /* block address of the node */
unsigned char version; /* version of the node */
};
enum {
IS_CHECKPOINTED, /* is it checkpointed before? */
HAS_FSYNCED_INODE, /* is the inode fsynced before? */
HAS_LAST_FSYNC, /* has the latest node fsync mark? */
IS_DIRTY, /* this nat entry is dirty? */
unsigned char flag; /* for node information bits */
};
struct nat_entry {
struct list_head list; /* for clean or dirty nat list */
unsigned char flag; /* for node information bits */
struct node_info ni; /* in-memory node information */
};
@ -63,20 +65,30 @@ struct nat_entry {
#define inc_node_version(version) (++version)
static inline void copy_node_info(struct node_info *dst,
struct node_info *src)
{
dst->nid = src->nid;
dst->ino = src->ino;
dst->blk_addr = src->blk_addr;
dst->version = src->version;
/* should not copy flag here */
}
static inline void set_nat_flag(struct nat_entry *ne,
unsigned int type, bool set)
{
unsigned char mask = 0x01 << type;
if (set)
ne->flag |= mask;
ne->ni.flag |= mask;
else
ne->flag &= ~mask;
ne->ni.flag &= ~mask;
}
static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
{
unsigned char mask = 0x01 << type;
return ne->flag & mask;
return ne->ni.flag & mask;
}
static inline void nat_reset_flag(struct nat_entry *ne)
@ -108,6 +120,7 @@ enum mem_type {
NAT_ENTRIES, /* indicates the cached nat entry */
DIRTY_DENTS, /* indicates dirty dentry pages */
INO_ENTRIES, /* indicates inode entries */
BASE_CHECK, /* check kernel status */
};
struct nat_entry_set {
@ -200,11 +213,19 @@ static inline void fill_node_footer(struct page *page, nid_t nid,
nid_t ino, unsigned int ofs, bool reset)
{
struct f2fs_node *rn = F2FS_NODE(page);
unsigned int old_flag = 0;
if (reset)
memset(rn, 0, sizeof(*rn));
else
old_flag = le32_to_cpu(rn->footer.flag);
rn->footer.nid = cpu_to_le32(nid);
rn->footer.ino = cpu_to_le32(ino);
rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT);
/* should remain old flag bits such as COLD_BIT_SHIFT */
rn->footer.flag = cpu_to_le32((ofs << OFFSET_BIT_SHIFT) |
(old_flag & OFFSET_BIT_MASK));
}
static inline void copy_node_footer(struct page *dst, struct page *src)

View File

@ -346,6 +346,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (IS_INODE(page)) {
recover_inline_xattr(inode, page);
} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
/*
* Deprecated; xattr blocks should be found from cold log.
* But, we should remain this for backward compatibility.
*/
recover_xattr_data(inode, page, blkaddr);
goto out;
}
@ -396,7 +400,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
/* write dummy data page */
recover_data_page(sbi, NULL, &sum, src, dest);
update_extent_cache(dest, &dn);
dn.data_blkaddr = dest;
update_extent_cache(&dn);
recovered++;
}
dn.ofs_in_node++;
@ -503,7 +508,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&inode_list);
/* step #1: find fsynced inode numbers */
sbi->por_doing = true;
set_sbi_flag(sbi, SBI_POR_DOING);
/* prevent checkpoint */
mutex_lock(&sbi->cp_mutex);
@ -536,7 +541,7 @@ out:
truncate_inode_pages_final(META_MAPPING(sbi));
}
sbi->por_doing = false;
clear_sbi_flag(sbi, SBI_POR_DOING);
if (err) {
discard_next_dnode(sbi, blkaddr);

View File

@ -20,6 +20,7 @@
#include "f2fs.h"
#include "segment.h"
#include "node.h"
#include "trace.h"
#include <trace/events/f2fs.h>
#define __reverse_ffz(x) __reverse_ffs(~(x))
@ -181,6 +182,7 @@ void register_inmem_page(struct inode *inode, struct page *page)
int err;
SetPagePrivate(page);
f2fs_trace_pid(page);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
@ -205,23 +207,6 @@ retry:
mutex_unlock(&fi->inmem_lock);
}
void invalidate_inmem_page(struct inode *inode, struct page *page)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct inmem_pages *cur;
mutex_lock(&fi->inmem_lock);
cur = radix_tree_lookup(&fi->inmem_root, page->index);
if (cur) {
radix_tree_delete(&fi->inmem_root, cur->page->index);
f2fs_put_page(cur->page, 0);
list_del(&cur->list);
kmem_cache_free(inmem_entry_slab, cur);
dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
}
mutex_unlock(&fi->inmem_lock);
}
void commit_inmem_pages(struct inode *inode, bool abort)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@ -230,7 +215,7 @@ void commit_inmem_pages(struct inode *inode, bool abort)
bool submit_bio = false;
struct f2fs_io_info fio = {
.type = DATA,
.rw = WRITE_SYNC,
.rw = WRITE_SYNC | REQ_PRIO,
};
/*
@ -240,33 +225,38 @@ void commit_inmem_pages(struct inode *inode, bool abort)
* Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
* inode becomes free by iget_locked in f2fs_iget.
*/
if (!abort)
if (!abort) {
f2fs_balance_fs(sbi);
f2fs_lock_op(sbi);
}
mutex_lock(&fi->inmem_lock);
list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
if (!abort) {
lock_page(cur->page);
if (!abort && cur->page->mapping == inode->i_mapping) {
if (cur->page->mapping == inode->i_mapping) {
f2fs_wait_on_page_writeback(cur->page, DATA);
if (clear_page_dirty_for_io(cur->page))
inode_dec_dirty_pages(inode);
do_write_data_page(cur->page, &fio);
submit_bio = true;
}
radix_tree_delete(&fi->inmem_root, cur->page->index);
f2fs_put_page(cur->page, 1);
} else {
put_page(cur->page);
}
radix_tree_delete(&fi->inmem_root, cur->page->index);
list_del(&cur->list);
kmem_cache_free(inmem_entry_slab, cur);
dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
}
if (submit_bio)
f2fs_submit_merged_bio(sbi, DATA, WRITE);
mutex_unlock(&fi->inmem_lock);
filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX);
if (!abort) {
f2fs_unlock_op(sbi);
if (submit_bio)
f2fs_submit_merged_bio(sbi, DATA, WRITE);
}
}
/*
@ -290,7 +280,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
/* check the # of cached NAT entries and prefree segments */
if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
excess_prefree_segs(sbi) ||
available_free_memory(sbi, INO_ENTRIES))
!available_free_memory(sbi, INO_ENTRIES))
f2fs_sync_fs(sbi->sb, true);
}
@ -515,12 +505,13 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
unsigned long dmap[entries];
unsigned long *dmap = SIT_I(sbi)->tmp_map;
unsigned int start = 0, end = -1;
bool force = (cpc->reason == CP_DISCARD);
int i;
if (!force && !test_opt(sbi, DISCARD))
if (!force && (!test_opt(sbi, DISCARD) ||
SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards))
return;
if (force && !se->valid_blocks) {
@ -548,7 +539,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
for (i = 0; i < entries; i++)
dmap[i] = ~(cur_map[i] | ckpt_map[i]);
dmap[i] = force ? ~ckpt_map[i] :
(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
start = __find_rev_next_bit(dmap, max_blocks, end + 1);
@ -735,7 +727,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
/*
* Calculate the number of current summary pages for writing
*/
int npages_for_summary_flush(struct f2fs_sb_info *sbi)
int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
{
int valid_sum_count = 0;
int i, sum_in_page;
@ -743,9 +735,14 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi)
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
if (sbi->ckpt->alloc_type[i] == SSR)
valid_sum_count += sbi->blocks_per_seg;
else {
if (for_ra)
valid_sum_count += le16_to_cpu(
F2FS_CKPT(sbi)->cur_data_blkoff[i]);
else
valid_sum_count += curseg_blkoff(sbi, i);
}
}
sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
SUM_FOOTER_SIZE) / SUMMARY_SIZE;
@ -803,7 +800,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
int go_left = 0;
int i;
write_lock(&free_i->segmap_lock);
spin_lock(&free_i->segmap_lock);
if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
segno = find_next_zero_bit(free_i->free_segmap,
@ -876,7 +873,7 @@ got_it:
f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
__set_inuse(sbi, segno);
*newseg = segno;
write_unlock(&free_i->segmap_lock);
spin_unlock(&free_i->segmap_lock);
}
static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
@ -927,7 +924,7 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi,
{
struct seg_entry *se = get_seg_entry(sbi, seg->segno);
int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
unsigned long target_map[entries];
unsigned long *target_map = SIT_I(sbi)->tmp_map;
unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
int i, pos;
@ -1027,18 +1024,22 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
stat_inc_seg_type(sbi, curseg);
}
static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int old_segno;
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
locate_dirty_segment(sbi, old_segno);
}
void allocate_new_segments(struct f2fs_sb_info *sbi)
{
struct curseg_info *curseg;
unsigned int old_curseg;
int i;
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
curseg = CURSEG_I(sbi, i);
old_curseg = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
locate_dirty_segment(sbi, old_curseg);
}
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
__allocate_new_segments(sbi, i);
}
static const struct segment_allocation default_salloc_ops = {
@ -1047,8 +1048,8 @@ static const struct segment_allocation default_salloc_ops = {
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
{
__u64 start = range->start >> sbi->log_blocksize;
__u64 end = start + (range->len >> sbi->log_blocksize) - 1;
__u64 start = F2FS_BYTES_TO_BLK(range->start);
__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
unsigned int start_segno, end_segno;
struct cp_control cpc;
@ -1065,16 +1066,21 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
GET_SEGNO(sbi, end);
cpc.reason = CP_DISCARD;
cpc.trim_start = start_segno;
cpc.trim_end = end_segno;
cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
cpc.trim_minlen = F2FS_BYTES_TO_BLK(range->minlen);
/* do checkpoint to issue discard commands safely */
for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
cpc.trim_start = start_segno;
cpc.trim_end = min_t(unsigned int, rounddown(start_segno +
BATCHED_TRIM_SEGMENTS(sbi),
sbi->segs_per_sec) - 1, end_segno);
mutex_lock(&sbi->gc_mutex);
write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
}
out:
range->len = cpc.trimmed << sbi->log_blocksize;
range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
return 0;
}
@ -1151,11 +1157,18 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
bool direct_io = (type == CURSEG_DIRECT_IO);
type = direct_io ? CURSEG_WARM_DATA : type;
curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
/* direct_io'ed data is aligned to the segment for better performance */
if (direct_io && curseg->next_blkoff)
__allocate_new_segments(sbi, type);
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/*
@ -1187,39 +1200,39 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
}
static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, struct f2fs_io_info *fio)
struct f2fs_summary *sum,
struct f2fs_io_info *fio)
{
int type = __get_segment_type(page, fio->type);
allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
allocate_data_block(sbi, page, fio->blk_addr, &fio->blk_addr, sum, type);
/* writeout dirty page into bdev */
f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
f2fs_submit_page_mbio(sbi, page, fio);
}
void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_io_info fio = {
.type = META,
.rw = WRITE_SYNC | REQ_META | REQ_PRIO
.rw = WRITE_SYNC | REQ_META | REQ_PRIO,
.blk_addr = page->index,
};
set_page_writeback(page);
f2fs_submit_page_mbio(sbi, page, page->index, &fio);
f2fs_submit_page_mbio(sbi, page, &fio);
}
void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
struct f2fs_io_info *fio,
unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
unsigned int nid, struct f2fs_io_info *fio)
{
struct f2fs_summary sum;
set_summary(&sum, nid, 0, 0);
do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);
do_write_page(sbi, page, &sum, fio);
}
void write_data_page(struct page *page, struct dnode_of_data *dn,
block_t *new_blkaddr, struct f2fs_io_info *fio)
struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_summary sum;
@ -1228,14 +1241,14 @@ void write_data_page(struct page *page, struct dnode_of_data *dn,
f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);
do_write_page(sbi, page, &sum, fio);
dn->data_blkaddr = fio->blk_addr;
}
void rewrite_data_page(struct page *page, block_t old_blkaddr,
struct f2fs_io_info *fio)
void rewrite_data_page(struct page *page, struct f2fs_io_info *fio)
{
f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio);
stat_inc_inplace_blocks(F2FS_P_SB(page));
f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio);
}
void recover_data_page(struct f2fs_sb_info *sbi,
@ -1393,7 +1406,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
segno = le32_to_cpu(ckpt->cur_data_segno[type]);
blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
CURSEG_HOT_DATA]);
if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
if (__exist_node_summaries(sbi))
blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
else
blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
@ -1402,7 +1415,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
CURSEG_HOT_NODE]);
blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
CURSEG_HOT_NODE]);
if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
if (__exist_node_summaries(sbi))
blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
type - CURSEG_HOT_NODE);
else
@ -1413,7 +1426,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
sum = (struct f2fs_summary_block *)page_address(new);
if (IS_NODESEG(type)) {
if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
if (__exist_node_summaries(sbi)) {
struct f2fs_summary *ns = &sum->entries[0];
int i;
for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
@ -1450,12 +1463,22 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
int err;
if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
int npages = npages_for_summary_flush(sbi, true);
if (npages >= 2)
ra_meta_pages(sbi, start_sum_block(sbi), npages,
META_CP);
/* restore for compacted data summary */
if (read_compacted_summaries(sbi))
return -EINVAL;
type = CURSEG_HOT_NODE;
}
if (__exist_node_summaries(sbi))
ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
NR_CURSEG_TYPE - type, META_CP);
for (; type <= CURSEG_COLD_NODE; type++) {
err = read_normal_summaries(sbi, type);
if (err)
@ -1549,7 +1572,6 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
{
if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
}
@ -1754,7 +1776,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
se = get_seg_entry(sbi, segno);
/* add discard candidates */
if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) {
if (cpc->reason != CP_DISCARD) {
cpc->trim_start = segno;
add_discard_addrs(sbi, cpc);
}
@ -1833,6 +1855,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
return -ENOMEM;
}
sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->tmp_map)
return -ENOMEM;
if (sbi->segs_per_sec > 1) {
sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
sizeof(struct sec_entry));
@ -1897,7 +1923,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
free_i->free_segments = 0;
free_i->free_sections = 0;
rwlock_init(&free_i->segmap_lock);
spin_lock_init(&free_i->segmap_lock);
return 0;
}
@ -2110,6 +2136,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->nr_discards = 0;
sm_info->max_discards = 0;
sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
INIT_LIST_HEAD(&sm_info->sit_entry_set);
if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
@ -2212,6 +2240,8 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
kfree(sit_i->sentries[start].ckpt_valid_map);
}
}
kfree(sit_i->tmp_map);
vfree(sit_i->sentries);
vfree(sit_i->sec_entries);
kfree(sit_i->dirty_sentries_bitmap);

View File

@ -189,6 +189,7 @@ struct sit_info {
char *sit_bitmap; /* SIT bitmap pointer */
unsigned int bitmap_size; /* SIT bitmap size */
unsigned long *tmp_map; /* bitmap for temporal use */
unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */
unsigned int dirty_sentries; /* # of dirty sentries */
unsigned int sents_per_block; /* # of SIT entries per block */
@ -207,7 +208,7 @@ struct free_segmap_info {
unsigned int start_segno; /* start segment number logically */
unsigned int free_segments; /* # of free segments */
unsigned int free_sections; /* # of free sections */
rwlock_t segmap_lock; /* free segmap lock */
spinlock_t segmap_lock; /* free segmap lock */
unsigned long *free_segmap; /* free segment bitmap */
unsigned long *free_secmap; /* free section bitmap */
};
@ -318,9 +319,9 @@ static inline unsigned int find_next_inuse(struct free_segmap_info *free_i,
unsigned int max, unsigned int segno)
{
unsigned int ret;
read_lock(&free_i->segmap_lock);
spin_lock(&free_i->segmap_lock);
ret = find_next_bit(free_i->free_segmap, max, segno);
read_unlock(&free_i->segmap_lock);
spin_unlock(&free_i->segmap_lock);
return ret;
}
@ -331,7 +332,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
unsigned int start_segno = secno * sbi->segs_per_sec;
unsigned int next;
write_lock(&free_i->segmap_lock);
spin_lock(&free_i->segmap_lock);
clear_bit(segno, free_i->free_segmap);
free_i->free_segments++;
@ -340,7 +341,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
clear_bit(secno, free_i->free_secmap);
free_i->free_sections++;
}
write_unlock(&free_i->segmap_lock);
spin_unlock(&free_i->segmap_lock);
}
static inline void __set_inuse(struct f2fs_sb_info *sbi,
@ -362,7 +363,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
unsigned int start_segno = secno * sbi->segs_per_sec;
unsigned int next;
write_lock(&free_i->segmap_lock);
spin_lock(&free_i->segmap_lock);
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
@ -373,7 +374,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
free_i->free_sections++;
}
}
write_unlock(&free_i->segmap_lock);
spin_unlock(&free_i->segmap_lock);
}
static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
@ -381,13 +382,13 @@ static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
write_lock(&free_i->segmap_lock);
spin_lock(&free_i->segmap_lock);
if (!test_and_set_bit(segno, free_i->free_segmap)) {
free_i->free_segments--;
if (!test_and_set_bit(secno, free_i->free_secmap))
free_i->free_sections--;
}
write_unlock(&free_i->segmap_lock);
spin_unlock(&free_i->segmap_lock);
}
static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
@ -460,7 +461,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
if (unlikely(sbi->por_doing))
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return false;
return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
@ -599,13 +600,13 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
{
if (segno > TOTAL_SEGS(sbi) - 1)
sbi->need_fsck = true;
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
{
if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi))
sbi->need_fsck = true;
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
/*
@ -616,11 +617,11 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
{
/* check segment usage */
if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg)
sbi->need_fsck = true;
set_sbi_flag(sbi, SBI_NEED_FSCK);
/* check boundary of a given segment number */
if (segno > TOTAL_SEGS(sbi) - 1)
sbi->need_fsck = true;
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
#endif

View File

@ -30,6 +30,7 @@
#include "segment.h"
#include "xattr.h"
#include "gc.h"
#include "trace.h"
#define CREATE_TRACE_POINTS
#include <trace/events/f2fs.h>
@ -41,6 +42,7 @@ static struct kset *f2fs_kset;
enum {
Opt_gc_background,
Opt_disable_roll_forward,
Opt_norecovery,
Opt_discard,
Opt_noheap,
Opt_user_xattr,
@ -61,6 +63,7 @@ enum {
static match_table_t f2fs_tokens = {
{Opt_gc_background, "background_gc=%s"},
{Opt_disable_roll_forward, "disable_roll_forward"},
{Opt_norecovery, "norecovery"},
{Opt_discard, "discard"},
{Opt_noheap, "no_heap"},
{Opt_user_xattr, "user_xattr"},
@ -192,6 +195,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
@ -207,6 +211,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_idle),
ATTR_LIST(reclaim_segments),
ATTR_LIST(max_small_discards),
ATTR_LIST(batched_trim_sections),
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
@ -286,6 +291,12 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_disable_roll_forward:
set_opt(sbi, DISABLE_ROLL_FORWARD);
break;
case Opt_norecovery:
/* this option mounts f2fs with ro */
set_opt(sbi, DISABLE_ROLL_FORWARD);
if (!f2fs_readonly(sb))
return -EINVAL;
break;
case Opt_discard:
set_opt(sbi, DISCARD);
break;
@ -446,8 +457,13 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_destroy_stats(sbi);
stop_gc_thread(sbi);
/* We don't need to do checkpoint when it's clean */
if (sbi->s_dirty) {
/*
* We don't need to do checkpoint when superblock is clean.
* But, the previous checkpoint was not done by umount, it needs to do
* clean checkpoint again.
*/
if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) {
struct cp_control cpc = {
.reason = CP_UMOUNT,
};
@ -486,13 +502,15 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
if (sync) {
struct cp_control cpc;
cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC;
cpc.reason = __get_cp_reason(sbi);
mutex_lock(&sbi->gc_mutex);
write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
} else {
f2fs_balance_fs(sbi);
}
f2fs_trace_ios(NULL, NULL, 1);
return 0;
}
@ -887,7 +905,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
atomic_set(&sbi->nr_pages[i], 0);
sbi->dir_level = DEF_DIR_LEVEL;
sbi->need_fsck = false;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
}
/*
@ -942,6 +960,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
struct inode *root;
long err = -EINVAL;
bool retry = true;
char *options = NULL;
int i;
try_onemore:
@ -973,9 +992,15 @@ try_onemore:
set_opt(sbi, POSIX_ACL);
#endif
/* parse mount options */
err = parse_options(sb, (char *)data);
if (err)
options = kstrdup((const char *)data, GFP_KERNEL);
if (data && !options) {
err = -ENOMEM;
goto free_sb_buf;
}
err = parse_options(sb, options);
if (err)
goto free_options;
sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
sb->s_max_links = F2FS_LINK_MAX;
@ -998,7 +1023,7 @@ try_onemore:
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
init_rwsem(&sbi->node_write);
sbi->por_doing = false;
clear_sbi_flag(sbi, SBI_POR_DOING);
spin_lock_init(&sbi->stat_lock);
init_rwsem(&sbi->read_io.io_rwsem);
@ -1019,7 +1044,7 @@ try_onemore:
if (IS_ERR(sbi->meta_inode)) {
f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
err = PTR_ERR(sbi->meta_inode);
goto free_sb_buf;
goto free_options;
}
err = get_valid_checkpoint(sbi);
@ -1122,10 +1147,19 @@ try_onemore:
goto free_proc;
if (!retry)
sbi->need_fsck = true;
set_sbi_flag(sbi, SBI_NEED_FSCK);
/* recover fsynced data */
if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
/*
* mount should be failed, when device has readonly mode, and
* previous checkpoint was not done by clean system shutdown.
*/
if (bdev_read_only(sb->s_bdev) &&
!is_set_ckpt_flags(sbi->ckpt, CP_UMOUNT_FLAG)) {
err = -EROFS;
goto free_kobj;
}
err = recover_fsync_data(sbi);
if (err) {
f2fs_msg(sb, KERN_ERR,
@ -1144,6 +1178,7 @@ try_onemore:
if (err)
goto free_kobj;
}
kfree(options);
return 0;
free_kobj:
@ -1168,6 +1203,8 @@ free_cp:
free_meta_inode:
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
free_options:
kfree(options);
free_sb_buf:
brelse(raw_super_buf);
free_sbi:
@ -1188,11 +1225,18 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
}
static void kill_f2fs_super(struct super_block *sb)
{
if (sb->s_root)
set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
kill_block_super(sb);
}
static struct file_system_type f2fs_fs_type = {
.owner = THIS_MODULE,
.name = "f2fs",
.mount = f2fs_mount,
.kill_sb = kill_block_super,
.kill_sb = kill_f2fs_super,
.fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("f2fs");
@ -1220,6 +1264,8 @@ static int __init init_f2fs_fs(void)
{
int err;
f2fs_build_trace_ios();
err = init_inodecache();
if (err)
goto fail;
@ -1229,12 +1275,9 @@ static int __init init_f2fs_fs(void)
err = create_segment_manager_caches();
if (err)
goto free_node_manager_caches;
err = create_gc_caches();
if (err)
goto free_segment_manager_caches;
err = create_checkpoint_caches();
if (err)
goto free_gc_caches;
goto free_segment_manager_caches;
f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
if (!f2fs_kset) {
err = -ENOMEM;
@ -1251,8 +1294,6 @@ free_kset:
kset_unregister(f2fs_kset);
free_checkpoint_caches:
destroy_checkpoint_caches();
free_gc_caches:
destroy_gc_caches();
free_segment_manager_caches:
destroy_segment_manager_caches();
free_node_manager_caches:
@ -1269,11 +1310,11 @@ static void __exit exit_f2fs_fs(void)
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
destroy_checkpoint_caches();
destroy_gc_caches();
destroy_segment_manager_caches();
destroy_node_manager_caches();
destroy_inodecache();
kset_unregister(f2fs_kset);
f2fs_destroy_trace_ios();
}
module_init(init_f2fs_fs)

159
fs/f2fs/trace.c Normal file
View File

@ -0,0 +1,159 @@
/*
* f2fs IO tracer
*
* Copyright (c) 2014 Motorola Mobility
* Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/sched.h>
#include <linux/radix-tree.h>
#include "f2fs.h"
#include "trace.h"
static RADIX_TREE(pids, GFP_ATOMIC);
static spinlock_t pids_lock;
static struct last_io_info last_io;
static inline void __print_last_io(void)
{
if (!last_io.len)
return;
trace_printk("%3x:%3x %4x %-16s %2x %5x %12x %4x\n",
last_io.major, last_io.minor,
last_io.pid, "----------------",
last_io.type,
last_io.fio.rw, last_io.fio.blk_addr,
last_io.len);
memset(&last_io, 0, sizeof(last_io));
}
static int __file_type(struct inode *inode, pid_t pid)
{
if (f2fs_is_atomic_file(inode))
return __ATOMIC_FILE;
else if (f2fs_is_volatile_file(inode))
return __VOLATILE_FILE;
else if (S_ISDIR(inode->i_mode))
return __DIR_FILE;
else if (inode->i_ino == F2FS_NODE_INO(F2FS_I_SB(inode)))
return __NODE_FILE;
else if (inode->i_ino == F2FS_META_INO(F2FS_I_SB(inode)))
return __META_FILE;
else if (pid)
return __NORMAL_FILE;
else
return __MISC_FILE;
}
void f2fs_trace_pid(struct page *page)
{
struct inode *inode = page->mapping->host;
pid_t pid = task_pid_nr(current);
void *p;
page->private = pid;
if (radix_tree_preload(GFP_NOFS))
return;
spin_lock(&pids_lock);
p = radix_tree_lookup(&pids, pid);
if (p == current)
goto out;
if (p)
radix_tree_delete(&pids, pid);
f2fs_radix_tree_insert(&pids, pid, current);
trace_printk("%3x:%3x %4x %-16s\n",
MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
pid, current->comm);
out:
spin_unlock(&pids_lock);
radix_tree_preload_end();
}
void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush)
{
struct inode *inode;
pid_t pid;
int major, minor;
if (flush) {
__print_last_io();
return;
}
inode = page->mapping->host;
pid = page_private(page);
major = MAJOR(inode->i_sb->s_dev);
minor = MINOR(inode->i_sb->s_dev);
if (last_io.major == major && last_io.minor == minor &&
last_io.pid == pid &&
last_io.type == __file_type(inode, pid) &&
last_io.fio.rw == fio->rw &&
last_io.fio.blk_addr + last_io.len == fio->blk_addr) {
last_io.len++;
return;
}
__print_last_io();
last_io.major = major;
last_io.minor = minor;
last_io.pid = pid;
last_io.type = __file_type(inode, pid);
last_io.fio = *fio;
last_io.len = 1;
return;
}
void f2fs_build_trace_ios(void)
{
spin_lock_init(&pids_lock);
}
#define PIDVEC_SIZE 128
static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index,
unsigned int max_items)
{
struct radix_tree_iter iter;
void **slot;
unsigned int ret = 0;
if (unlikely(!max_items))
return 0;
radix_tree_for_each_slot(slot, &pids, &iter, first_index) {
results[ret] = iter.index;
if (++ret == PIDVEC_SIZE)
break;
}
return ret;
}
void f2fs_destroy_trace_ios(void)
{
pid_t pid[PIDVEC_SIZE];
pid_t next_pid = 0;
unsigned int found;
spin_lock(&pids_lock);
while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) {
unsigned idx;
next_pid = pid[found - 1] + 1;
for (idx = 0; idx < found; idx++)
radix_tree_delete(&pids, pid[idx]);
}
spin_unlock(&pids_lock);
}

46
fs/f2fs/trace.h Normal file
View File

@ -0,0 +1,46 @@
/*
* f2fs IO tracer
*
* Copyright (c) 2014 Motorola Mobility
* Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __F2FS_TRACE_H__
#define __F2FS_TRACE_H__
#ifdef CONFIG_F2FS_IO_TRACE
#include <trace/events/f2fs.h>
enum file_type {
__NORMAL_FILE,
__DIR_FILE,
__NODE_FILE,
__META_FILE,
__ATOMIC_FILE,
__VOLATILE_FILE,
__MISC_FILE,
};
struct last_io_info {
int major, minor;
pid_t pid;
enum file_type type;
struct f2fs_io_info fio;
block_t len;
};
extern void f2fs_trace_pid(struct page *);
extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int);
extern void f2fs_build_trace_ios(void);
extern void f2fs_destroy_trace_ios(void);
#else
#define f2fs_trace_pid(p)
#define f2fs_trace_ios(p, i, n)
#define f2fs_build_trace_ios()
#define f2fs_destroy_trace_ios()
#endif
#endif /* __F2FS_TRACE_H__ */

View File

@ -19,12 +19,16 @@
#define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */
#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */
#define F2FS_BLKSIZE 4096 /* support only 4KB block */
#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */
#define F2FS_MAX_EXTENSION 64 /* # of extension entries */
#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
#define NULL_ADDR ((block_t)0) /* used as block_t addresses */
#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */
#define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS)
#define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS)
/* 0, 1(node nid), 2(meta nid) are reserved node id */
#define F2FS_RESERVED_NODE_NUM 3
@ -87,6 +91,7 @@ struct f2fs_super_block {
/*
* For checkpoint
*/
#define CP_FASTBOOT_FLAG 0x00000020
#define CP_FSCK_FLAG 0x00000010
#define CP_ERROR_FLAG 0x00000008
#define CP_COMPACT_SUM_FLAG 0x00000004
@ -224,6 +229,8 @@ enum {
OFFSET_BIT_SHIFT
};
#define OFFSET_BIT_MASK (0x07) /* (0x01 << OFFSET_BIT_SHIFT) - 1 */
struct node_footer {
__le32 nid; /* node id */
__le32 ino; /* inode nunmber */

View File

@ -72,6 +72,7 @@
#define show_cpreason(type) \
__print_symbolic(type, \
{ CP_UMOUNT, "Umount" }, \
{ CP_FASTBOOT, "Fastboot" }, \
{ CP_SYNC, "Sync" }, \
{ CP_DISCARD, "Discard" })
@ -148,14 +149,14 @@ DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter,
TRACE_EVENT(f2fs_sync_file_exit,
TP_PROTO(struct inode *inode, bool need_cp, int datasync, int ret),
TP_PROTO(struct inode *inode, int need_cp, int datasync, int ret),
TP_ARGS(inode, need_cp, datasync, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(bool, need_cp)
__field(int, need_cp)
__field(int, datasync)
__field(int, ret)
),
@ -190,7 +191,7 @@ TRACE_EVENT(f2fs_sync_fs,
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->dirty = F2FS_SB(sb)->s_dirty;
__entry->dirty = is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY);
__entry->wait = wait;
),
@ -440,38 +441,6 @@ TRACE_EVENT(f2fs_truncate_partial_nodes,
__entry->err)
);
TRACE_EVENT_CONDITION(f2fs_submit_page_bio,
TP_PROTO(struct page *page, sector_t blkaddr, int type),
TP_ARGS(page, blkaddr, type),
TP_CONDITION(page->mapping),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(pgoff_t, index)
__field(sector_t, blkaddr)
__field(int, type)
),
TP_fast_assign(
__entry->dev = page->mapping->host->i_sb->s_dev;
__entry->ino = page->mapping->host->i_ino;
__entry->index = page->index;
__entry->blkaddr = blkaddr;
__entry->type = type;
),
TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
"blkaddr = 0x%llx, bio_type = %s%s",
show_dev_ino(__entry),
(unsigned long)__entry->index,
(unsigned long long)__entry->blkaddr,
show_bio_type(__entry->type))
);
TRACE_EVENT(f2fs_get_data_block,
TP_PROTO(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int ret),
@ -680,11 +649,63 @@ TRACE_EVENT(f2fs_reserve_new_block,
__entry->ofs_in_node)
);
DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
TP_PROTO(struct page *page, struct f2fs_io_info *fio),
TP_ARGS(page, fio),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(pgoff_t, index)
__field(block_t, blkaddr)
__field(int, rw)
__field(int, type)
),
TP_fast_assign(
__entry->dev = page->mapping->host->i_sb->s_dev;
__entry->ino = page->mapping->host->i_ino;
__entry->index = page->index;
__entry->blkaddr = fio->blk_addr;
__entry->rw = fio->rw;
__entry->type = fio->type;
),
TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
"blkaddr = 0x%llx, rw = %s%s, type = %s",
show_dev_ino(__entry),
(unsigned long)__entry->index,
(unsigned long long)__entry->blkaddr,
show_bio_type(__entry->rw),
show_block_type(__entry->type))
);
DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio,
TP_PROTO(struct page *page, struct f2fs_io_info *fio),
TP_ARGS(page, fio),
TP_CONDITION(page->mapping)
);
DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio,
TP_PROTO(struct page *page, struct f2fs_io_info *fio),
TP_ARGS(page, fio),
TP_CONDITION(page->mapping)
);
DECLARE_EVENT_CLASS(f2fs__submit_bio,
TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio),
TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
struct bio *bio),
TP_ARGS(sb, rw, type, bio),
TP_ARGS(sb, fio, bio),
TP_STRUCT__entry(
__field(dev_t, dev)
@ -696,8 +717,8 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->rw = rw;
__entry->type = type;
__entry->rw = fio->rw;
__entry->type = fio->type;
__entry->sector = bio->bi_iter.bi_sector;
__entry->size = bio->bi_iter.bi_size;
),
@ -712,18 +733,20 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio,
TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio),
TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
struct bio *bio),
TP_ARGS(sb, rw, type, bio),
TP_ARGS(sb, fio, bio),
TP_CONDITION(bio)
);
DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio,
TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio),
TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
struct bio *bio),
TP_ARGS(sb, rw, type, bio),
TP_ARGS(sb, fio, bio),
TP_CONDITION(bio)
);
@ -916,38 +939,6 @@ TRACE_EVENT(f2fs_writepages,
__entry->for_sync)
);
TRACE_EVENT(f2fs_submit_page_mbio,
TP_PROTO(struct page *page, int rw, int type, block_t blk_addr),
TP_ARGS(page, rw, type, blk_addr),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(int, rw)
__field(int, type)
__field(pgoff_t, index)
__field(block_t, block)
),
TP_fast_assign(
__entry->dev = page->mapping->host->i_sb->s_dev;
__entry->ino = page->mapping->host->i_ino;
__entry->rw = rw;
__entry->type = type;
__entry->index = page->index;
__entry->block = blk_addr;
),
TP_printk("dev = (%d,%d), ino = %lu, %s%s, %s, index = %lu, blkaddr = 0x%llx",
show_dev_ino(__entry),
show_bio_type(__entry->rw),
show_block_type(__entry->type),
(unsigned long)__entry->index,
(unsigned long long)__entry->block)
);
TRACE_EVENT(f2fs_write_checkpoint,
TP_PROTO(struct super_block *sb, int reason, char *msg),
@ -998,14 +989,15 @@ TRACE_EVENT(f2fs_issue_discard,
TRACE_EVENT(f2fs_issue_flush,
TP_PROTO(struct super_block *sb, bool nobarrier, bool flush_merge),
TP_PROTO(struct super_block *sb, unsigned int nobarrier,
unsigned int flush_merge),
TP_ARGS(sb, nobarrier, flush_merge),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(bool, nobarrier)
__field(bool, flush_merge)
__field(unsigned int, nobarrier)
__field(unsigned int, flush_merge)
),
TP_fast_assign(