From cd52b6368f1301b55d0e484105c876930e443d83 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 Dec 2014 15:18:34 -0800 Subject: [PATCH 01/64] f2fs: remove checking dirty_exceed We don't need to force to write dirty_exceeded for f2fs_balance_fs_bg. This flag was only meaningful to write bypassing conditions. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f83326ca32ef..d6f073edd861 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -57,8 +57,6 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) } else if (type == INO_ENTRIES) { int i; - if (sbi->sb->s_bdi->dirty_exceeded) - return false; for (i = 0; i <= UPDATE_INO; i++) mem_size += (sbi->im[i].ino_num * sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; From 88a70a69c088933011615fe26242e0335b012284 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 Dec 2014 15:20:48 -0800 Subject: [PATCH 02/64] f2fs: fix wrong condition check to trigger f2fs_sync_fs If there is not enough available memory, we need to trigger f2fs_sync_fs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 42607a679923..11e4b5c14616 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -290,7 +290,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) /* check the # of cached NAT entries and prefree segments */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || excess_prefree_segs(sbi) || - available_free_memory(sbi, INO_ENTRIES)) + !available_free_memory(sbi, INO_ENTRIES)) f2fs_sync_fs(sbi->sb, true); } From 70c640b1d6fd8484d0629b90052d6f6f738023fb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 10 Dec 2014 13:59:33 -0800 Subject: [PATCH 03/64] f2fs: don't need to call lock_op and lock_page for abort We don't need to call lock_op and lock_page at the aborting path. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 11e4b5c14616..3ce86c533604 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -240,33 +240,38 @@ void commit_inmem_pages(struct inode *inode, bool abort) * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this * inode becomes free by iget_locked in f2fs_iget. */ - if (!abort) + if (!abort) { f2fs_balance_fs(sbi); - - f2fs_lock_op(sbi); + f2fs_lock_op(sbi); + } mutex_lock(&fi->inmem_lock); list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { - lock_page(cur->page); - if (!abort && cur->page->mapping == inode->i_mapping) { - f2fs_wait_on_page_writeback(cur->page, DATA); - if (clear_page_dirty_for_io(cur->page)) - inode_dec_dirty_pages(inode); - do_write_data_page(cur->page, &fio); - submit_bio = true; + if (!abort) { + lock_page(cur->page); + if (cur->page->mapping == inode->i_mapping) { + f2fs_wait_on_page_writeback(cur->page, DATA); + if (clear_page_dirty_for_io(cur->page)) + inode_dec_dirty_pages(inode); + do_write_data_page(cur->page, &fio); + submit_bio = true; + } + f2fs_put_page(cur->page, 1); + } else { + put_page(cur->page); } radix_tree_delete(&fi->inmem_root, cur->page->index); - f2fs_put_page(cur->page, 1); list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); } - if (submit_bio) - f2fs_submit_merged_bio(sbi, DATA, WRITE); mutex_unlock(&fi->inmem_lock); - filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX); - f2fs_unlock_op(sbi); + if (!abort) { + f2fs_unlock_op(sbi); + if (submit_bio) + f2fs_submit_merged_bio(sbi, DATA, WRITE); + } } /* From 1e84371ffeef451e8532e0cd04c2fe59ff10c514 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 9 Dec 2014 06:08:59 -0800 Subject: [PATCH 04/64] f2fs: change atomic and volatile write policies This patch adds two new ioctls to release inmemory pages grabbed by atomic writes. o f2fs_ioc_abort_volatile_write - If transaction was failed, all the grabbed pages and data should be written. o f2fs_ioc_release_volatile_write - This is to enhance the performance of PERSIST mode in sqlite. In order to avoid huge memory consumption which causes OOM, this patch changes volatile writes to use normal dirty pages, instead blocked flushing to the disk as long as system does not suffer from memory pressure. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 9 ++++-- fs/f2fs/f2fs.h | 8 +++++ fs/f2fs/file.c | 77 +++++++++++++++++++++++++++++++++++++++++------ fs/f2fs/inode.c | 2 +- fs/f2fs/node.c | 3 ++ fs/f2fs/node.h | 1 + fs/f2fs/segment.c | 2 +- 7 files changed, 88 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7ec697b37f19..32264e3d524f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -814,6 +814,11 @@ static int f2fs_write_data_page(struct page *page, write: if (unlikely(sbi->por_doing)) goto redirty_out; + if (f2fs_is_drop_cache(inode)) + goto out; + if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim && + available_free_memory(sbi, BASE_CHECK)) + goto redirty_out; /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { @@ -1109,7 +1114,7 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) return; - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + if (f2fs_is_atomic_file(inode)) invalidate_inmem_page(inode, page); if (PageDirty(page)) @@ -1132,7 +1137,7 @@ static int f2fs_set_data_page_dirty(struct page *page) SetPageUptodate(page); - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) { + if (f2fs_is_atomic_file(inode)) { register_inmem_page(inode, page); return 1; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ec58bb2373fc..8c9bf3d6cb7d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -201,6 +201,8 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) +#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) +#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) #if defined(__KERNEL__) && defined(CONFIG_COMPAT) /* @@ -1113,6 +1115,7 @@ enum { FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_VOLATILE_FILE, /* indicate volatile file */ + FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ }; @@ -1220,6 +1223,11 @@ static inline bool f2fs_is_volatile_file(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); } +static inline bool f2fs_is_drop_cache(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); +} + static inline void *inline_data_addr(struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3c27e0ecb3bc..5139f90e92c1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -836,6 +836,19 @@ static long f2fs_fallocate(struct file *file, int mode, return ret; } +static int f2fs_release_file(struct inode *inode, struct file *filp) +{ + /* some remained atomic pages should discarded */ + if (f2fs_is_atomic_file(inode)) + commit_inmem_pages(inode, true); + if (f2fs_is_volatile_file(inode)) { + set_inode_flag(F2FS_I(inode), FI_DROP_CACHE); + filemap_fdatawrite(inode->i_mapping); + clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE); + } + return 0; +} + #define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) #define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) @@ -909,26 +922,20 @@ out: static int f2fs_ioc_start_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); if (!inode_owner_or_capable(inode)) return -EACCES; - f2fs_balance_fs(sbi); + f2fs_balance_fs(F2FS_I_SB(inode)); + + if (f2fs_is_atomic_file(inode)) + return 0; set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); return f2fs_convert_inline_inode(inode); } -static int f2fs_release_file(struct inode *inode, struct file *filp) -{ - /* some remained atomic pages should discarded */ - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) - commit_inmem_pages(inode, true); - return 0; -} - static int f2fs_ioc_commit_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); @@ -949,6 +956,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); mnt_drop_write_file(filp); + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); return ret; } @@ -959,11 +967,56 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; + if (f2fs_is_volatile_file(inode)) + return 0; + set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); return f2fs_convert_inline_inode(inode); } +static int f2fs_ioc_release_volatile_write(struct file *filp) +{ + struct inode *inode = file_inode(filp); + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (!f2fs_is_volatile_file(inode)) + return 0; + + punch_hole(inode, 0, F2FS_BLKSIZE); + return 0; +} + +static int f2fs_ioc_abort_volatile_write(struct file *filp) +{ + struct inode *inode = file_inode(filp); + int ret; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + f2fs_balance_fs(F2FS_I_SB(inode)); + + if (f2fs_is_atomic_file(inode)) { + commit_inmem_pages(inode, false); + clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + } + + if (f2fs_is_volatile_file(inode)) { + clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + filemap_fdatawrite(inode->i_mapping); + set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + } + mnt_drop_write_file(filp); + return ret; +} + static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1007,6 +1060,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_commit_atomic_write(filp); case F2FS_IOC_START_VOLATILE_WRITE: return f2fs_ioc_start_volatile_write(filp); + case F2FS_IOC_RELEASE_VOLATILE_WRITE: + return f2fs_ioc_release_volatile_write(filp); + case F2FS_IOC_ABORT_VOLATILE_WRITE: + return f2fs_ioc_abort_volatile_write(filp); case FITRIM: return f2fs_ioc_fitrim(filp, arg); default: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 196cc7843aaf..3a8958d1684f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -304,7 +304,7 @@ void f2fs_evict_inode(struct inode *inode) nid_t xnid = F2FS_I(inode)->i_xattr_nid; /* some remained atomic pages should discarded */ - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + if (f2fs_is_atomic_file(inode)) commit_inmem_pages(inode, true); trace_f2fs_evict_inode(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d6f073edd861..cabecee88377 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -61,6 +61,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) mem_size += (sbi->im[i].ino_num * sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else { + if (sbi->sb->s_bdi->dirty_exceeded) + return false; } return res; } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index d10b6448a671..036b887176ff 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -108,6 +108,7 @@ enum mem_type { NAT_ENTRIES, /* indicates the cached nat entry */ DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ + BASE_CHECK, /* check kernel status */ }; struct nat_entry_set { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3ce86c533604..de9c0700c88e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -230,7 +230,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) bool submit_bio = false; struct f2fs_io_info fio = { .type = DATA, - .rw = WRITE_SYNC, + .rw = WRITE_SYNC | REQ_PRIO, }; /* From d7bc2484b8d4e580370c66ad93c4319225bd104d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 12 Dec 2014 13:53:41 -0800 Subject: [PATCH 05/64] f2fs: fix small discards not to issue redundantly The ckpt_valid_map and cur_valid_map are synced by seg_info_to_raw_sit. In the case of small discards, the candidates are selected before sync, while fitrim selects candidates after sync. So, for small discards, we need to add candidates only just being obsoleted. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index de9c0700c88e..335418c9a06b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -525,7 +525,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) bool force = (cpc->reason == CP_DISCARD); int i; - if (!force && !test_opt(sbi, DISCARD)) + if (!force && (!test_opt(sbi, DISCARD) || + SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)) return; if (force && !se->valid_blocks) { @@ -553,7 +554,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ for (i = 0; i < entries; i++) - dmap[i] = ~(cur_map[i] | ckpt_map[i]); + dmap[i] = force ? ~ckpt_map[i] : + (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { start = __find_rev_next_bit(dmap, max_blocks, end + 1); @@ -1759,7 +1761,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) se = get_seg_entry(sbi, segno); /* add discard candidates */ - if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) { + if (cpc->reason != CP_DISCARD) { cpc->trim_start = segno; add_discard_addrs(sbi, cpc); } From 042b7816aaebb1eb137b9889c20b595d951d15b7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 12 Dec 2014 19:40:02 -0800 Subject: [PATCH 06/64] f2fs: remove unnecessary call to invalidate inmemory pages Now we use inmemory pages for atomic write only and provide abort procedure, we don't need to truncate them explicitly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 --- fs/f2fs/f2fs.h | 1 - fs/f2fs/segment.c | 17 ----------------- 3 files changed, 21 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 32264e3d524f..caa08e46697a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1114,9 +1114,6 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) return; - if (f2fs_is_atomic_file(inode)) - invalidate_inmem_page(inode, page); - if (PageDirty(page)) inode_dec_dirty_pages(inode); ClearPagePrivate(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8c9bf3d6cb7d..d3699da17e6b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1397,7 +1397,6 @@ void destroy_node_manager_caches(void); * segment.c */ void register_inmem_page(struct inode *, struct page *); -void invalidate_inmem_page(struct inode *, struct page *); void commit_inmem_pages(struct inode *, bool); void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 335418c9a06b..3791fa93dc7b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -205,23 +205,6 @@ retry: mutex_unlock(&fi->inmem_lock); } -void invalidate_inmem_page(struct inode *inode, struct page *page) -{ - struct f2fs_inode_info *fi = F2FS_I(inode); - struct inmem_pages *cur; - - mutex_lock(&fi->inmem_lock); - cur = radix_tree_lookup(&fi->inmem_root, page->index); - if (cur) { - radix_tree_delete(&fi->inmem_root, cur->page->index); - f2fs_put_page(cur->page, 0); - list_del(&cur->list); - kmem_cache_free(inmem_entry_slab, cur); - dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); - } - mutex_unlock(&fi->inmem_lock); -} - void commit_inmem_pages(struct inode *inode, bool abort) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); From 5df1f1da7a148c4a14d035b49c4d89790f59a57a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 13 Dec 2014 13:13:11 -0800 Subject: [PATCH 07/64] f2fs: use missing the use of f2fs_kunmap_page This patch calls f2fs_kunmap_page which I missed before. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b1a7d5737cd0..b74097a7f6d9 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -286,8 +286,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); + f2fs_dentry_kunmap(dir, page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); From 3fa06d7bc9f579bd180e879fd1c9bdb6b1b0d9b7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 9 Dec 2014 14:21:46 +0800 Subject: [PATCH 08/64] f2fs: readahead contiguous current summary blocks in checkpoint Let's add readahead code for reading contiguous compact/normal summary blocks in checkpoint, then we will gain better performance in mount procedure. Changes from v1 o remove inappropriate 'unlikely' in npages_for_summary_flush. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 21 ++++++++++++++++++--- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index e6c271fefaca..825158e4855a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -922,7 +922,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->next_free_nid = cpu_to_le32(last_nid); /* 2 cp + n data seg summary + orphan inode blocks */ - data_sum_blocks = npages_for_summary_flush(sbi); + data_sum_blocks = npages_for_summary_flush(sbi, false); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d3699da17e6b..a8ccbceb869a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1408,7 +1408,7 @@ void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); void clear_prefree_segments(struct f2fs_sb_info *); void release_discard_addrs(struct f2fs_sb_info *); void discard_next_dnode(struct f2fs_sb_info *, block_t); -int npages_for_summary_flush(struct f2fs_sb_info *); +int npages_for_summary_flush(struct f2fs_sb_info *, bool); void allocate_new_segments(struct f2fs_sb_info *); int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3791fa93dc7b..c950c9318e8e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -725,7 +725,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, /* * Calculate the number of current summary pages for writing */ -int npages_for_summary_flush(struct f2fs_sb_info *sbi) +int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) { int valid_sum_count = 0; int i, sum_in_page; @@ -733,8 +733,13 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi) for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { if (sbi->ckpt->alloc_type[i] == SSR) valid_sum_count += sbi->blocks_per_seg; - else - valid_sum_count += curseg_blkoff(sbi, i); + else { + if (for_ra) + valid_sum_count += le16_to_cpu( + F2FS_CKPT(sbi)->cur_data_blkoff[i]); + else + valid_sum_count += curseg_blkoff(sbi, i); + } } sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE - @@ -1440,12 +1445,22 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) int err; if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { + int npages = npages_for_summary_flush(sbi, true); + + if (npages >= 2) + ra_meta_pages(sbi, start_sum_block(sbi), npages, + META_CP); + /* restore for compacted data summary */ if (read_compacted_summaries(sbi)) return -EINVAL; type = CURSEG_HOT_NODE; } + if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) + ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), + NR_CURSEG_TYPE - type, META_CP); + for (; type <= CURSEG_COLD_NODE; type++) { err = read_normal_summaries(sbi, type); if (err) From 5c27f4ee447b4ef1cd88d5313eeb838c56265571 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 18 Dec 2014 17:37:21 +0800 Subject: [PATCH 09/64] f2fs: merge two uchar variable in struct node_info to reduce memory cost This patch moves one member of struct nat_entry: _flag_ to struct node_info, so _version_ in struct node_info and _flag_ which are unsigned char type will merge to one 32-bit space in register/memory. So the size of nat_entry will be reduced from 28 bytes to 24 bytes (for 64-bit machine, reduce its size from 40 bytes to 32 bytes) and then slab memory using by f2fs will be reduced. changes from v2: o update description of memory usage gain for 64-bit machine suggested by Changman Lee. changes from v1: o introduce inline copy_node_info() to copy valid data from node info suggested by Jaegeuk Kim, it can avoid bug. Reviewed-by: Changman Lee Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- fs/f2fs/node.h | 33 ++++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index cabecee88377..e565b9638971 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -269,7 +269,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); - e->ni = *ni; + copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { /* @@ -277,7 +277,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, * previous nat entry can be remained in nat cache. * So, reinitialize it with new information. */ - e->ni = *ni; + copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 036b887176ff..fa6f95968b42 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -29,6 +29,14 @@ /* return value for read_node_page */ #define LOCKED_PAGE 1 +/* For flag in struct node_info */ +enum { + IS_CHECKPOINTED, /* is it checkpointed before? */ + HAS_FSYNCED_INODE, /* is the inode fsynced before? */ + HAS_LAST_FSYNC, /* has the latest node fsync mark? */ + IS_DIRTY, /* this nat entry is dirty? */ +}; + /* * For node information */ @@ -37,18 +45,11 @@ struct node_info { nid_t ino; /* inode number of the node's owner */ block_t blk_addr; /* block address of the node */ unsigned char version; /* version of the node */ -}; - -enum { - IS_CHECKPOINTED, /* is it checkpointed before? */ - HAS_FSYNCED_INODE, /* is the inode fsynced before? */ - HAS_LAST_FSYNC, /* has the latest node fsync mark? */ - IS_DIRTY, /* this nat entry is dirty? */ + unsigned char flag; /* for node information bits */ }; struct nat_entry { struct list_head list; /* for clean or dirty nat list */ - unsigned char flag; /* for node information bits */ struct node_info ni; /* in-memory node information */ }; @@ -63,20 +64,30 @@ struct nat_entry { #define inc_node_version(version) (++version) +static inline void copy_node_info(struct node_info *dst, + struct node_info *src) +{ + dst->nid = src->nid; + dst->ino = src->ino; + dst->blk_addr = src->blk_addr; + dst->version = src->version; + /* should not copy flag here */ +} + static inline void set_nat_flag(struct nat_entry *ne, unsigned int type, bool set) { unsigned char mask = 0x01 << type; if (set) - ne->flag |= mask; + ne->ni.flag |= mask; else - ne->flag &= ~mask; + ne->ni.flag &= ~mask; } static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) { unsigned char mask = 0x01 << type; - return ne->flag & mask; + return ne->ni.flag & mask; } static inline void nat_reset_flag(struct nat_entry *ne) From 9ecf4b80bd32ad727d7fca56706bb5b059935c18 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 18 Dec 2014 18:29:05 +0800 Subject: [PATCH 10/64] f2fs: use ra_meta_pages to simplify readahead code in restore_node_summary Use more common function ra_meta_pages() with META_POR to readahead node blocks in restore_node_summary() instead of ra_sum_pages(), hence we can simplify the readahead code there, and also we can remove unused function ra_sum_pages(). changes from v2: o use invalidate_mapping_pages as before suggested by Changman Lee. changes from v1: o fix one bug when using truncate_inode_pages_range which is pointed out by Jaegeuk Kim. Reviewed-by: Changman Lee Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 65 ++++++++++---------------------------------------- 1 file changed, 13 insertions(+), 52 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e565b9638971..bcfd67c80196 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1727,80 +1727,41 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) return 0; } -/* - * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-read pages are allocated in bd_inode's mapping tree. - */ -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, - int start, int nrpages) -{ - struct inode *inode = sbi->sb->s_bdev->bd_inode; - struct address_space *mapping = inode->i_mapping; - int i, page_idx = start; - struct f2fs_io_info fio = { - .type = META, - .rw = READ_SYNC | REQ_META | REQ_PRIO - }; - - for (i = 0; page_idx < start + nrpages; page_idx++, i++) { - /* alloc page in bd_inode for reading node summary info */ - pages[i] = grab_cache_page(mapping, page_idx); - if (!pages[i]) - break; - f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio); - } - - f2fs_submit_merged_bio(sbi, META, READ); - return i; -} - int restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum) { struct f2fs_node *rn; struct f2fs_summary *sum_entry; - struct inode *inode = sbi->sb->s_bdev->bd_inode; block_t addr; int bio_blocks = MAX_BIO_BLOCKS(sbi); - struct page *pages[bio_blocks]; - int i, idx, last_offset, nrpages, err = 0; + int i, idx, last_offset, nrpages; /* scan the node segment */ last_offset = sbi->blocks_per_seg; addr = START_BLOCK(sbi, segno); sum_entry = &sum->entries[0]; - for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { + for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { nrpages = min(last_offset - i, bio_blocks); /* readahead node pages */ - nrpages = ra_sum_pages(sbi, pages, addr, nrpages); - if (!nrpages) - return -ENOMEM; + ra_meta_pages(sbi, addr, nrpages, META_POR); - for (idx = 0; idx < nrpages; idx++) { - if (err) - goto skip; + for (idx = addr; idx < addr + nrpages; idx++) { + struct page *page = get_meta_page(sbi, idx); - lock_page(pages[idx]); - if (unlikely(!PageUptodate(pages[idx]))) { - err = -EIO; - } else { - rn = F2FS_NODE(pages[idx]); - sum_entry->nid = rn->footer.nid; - sum_entry->version = 0; - sum_entry->ofs_in_node = 0; - sum_entry++; - } - unlock_page(pages[idx]); -skip: - page_cache_release(pages[idx]); + rn = F2FS_NODE(page); + sum_entry->nid = rn->footer.nid; + sum_entry->version = 0; + sum_entry->ofs_in_node = 0; + sum_entry++; + f2fs_put_page(page, 1); } - invalidate_mapping_pages(inode->i_mapping, addr, + invalidate_mapping_pages(META_MAPPING(sbi), addr, addr + nrpages); } - return err; + return 0; } static void remove_nats_in_journal(struct f2fs_sb_info *sbi) From cf04e8eb55290c7b836c36f0b4e1a8d0fe8ee275 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 Dec 2014 19:33:13 -0800 Subject: [PATCH 11/64] f2fs: use f2fs_io_info to clean up messy parameters during IO path This patch cleans up parameters on IO paths. The key idea is to use f2fs_io_info adding a parameter, block address, and then use this structure as parameters. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 24 ++++++++-------- fs/f2fs/data.c | 66 +++++++++++++++++++++++++++----------------- fs/f2fs/f2fs.h | 14 ++++++---- fs/f2fs/inline.c | 7 ++--- fs/f2fs/node.c | 14 +++++++--- fs/f2fs/segment.c | 28 +++++++++---------- 6 files changed, 87 insertions(+), 66 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 825158e4855a..0ac7c39605ed 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -50,6 +50,11 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) { struct address_space *mapping = META_MAPPING(sbi); struct page *page; + struct f2fs_io_info fio = { + .type = META, + .rw = READ_SYNC | REQ_META | REQ_PRIO, + .blk_addr = index, + }; repeat: page = grab_cache_page(mapping, index); if (!page) { @@ -59,8 +64,7 @@ repeat: if (PageUptodate(page)) goto out; - if (f2fs_submit_page_bio(sbi, page, index, - READ_SYNC | REQ_META | REQ_PRIO)) + if (f2fs_submit_page_bio(sbi, page, &fio)) goto repeat; lock_page(page); @@ -112,14 +116,12 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type block_t prev_blk_addr = 0; struct page *page; block_t blkno = start; - struct f2fs_io_info fio = { .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO }; for (; nrpages-- > 0; blkno++) { - block_t blk_addr; if (!is_valid_blkaddr(sbi, blkno, type)) goto out; @@ -130,27 +132,27 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) blkno = 0; /* get nat block addr */ - blk_addr = current_nat_addr(sbi, + fio.blk_addr = current_nat_addr(sbi, blkno * NAT_ENTRY_PER_BLOCK); break; case META_SIT: /* get sit block addr */ - blk_addr = current_sit_addr(sbi, + fio.blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); - if (blkno != start && prev_blk_addr + 1 != blk_addr) + if (blkno != start && prev_blk_addr + 1 != fio.blk_addr) goto out; - prev_blk_addr = blk_addr; + prev_blk_addr = fio.blk_addr; break; case META_SSA: case META_CP: case META_POR: - blk_addr = blkno; + fio.blk_addr = blkno; break; default: BUG(); } - page = grab_cache_page(META_MAPPING(sbi), blk_addr); + page = grab_cache_page(META_MAPPING(sbi), fio.blk_addr); if (!page) continue; if (PageUptodate(page)) { @@ -158,7 +160,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type continue; } - f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); + f2fs_submit_page_mbio(sbi, page, &fio); f2fs_put_page(page, 0); } out: diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index caa08e46697a..d86f8b1413f1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -132,14 +132,14 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, * Return unlocked page. */ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, int rw) + struct f2fs_io_info *fio) { struct bio *bio; - trace_f2fs_submit_page_bio(page, blk_addr, rw); + trace_f2fs_submit_page_bio(page, fio->blk_addr, fio->rw); /* Allocate a new bio */ - bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw)); + bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw)); if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { bio_put(bio); @@ -147,12 +147,12 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, return -EFAULT; } - submit_bio(rw, bio); + submit_bio(fio->rw, bio); return 0; } void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, struct f2fs_io_info *fio) + struct f2fs_io_info *fio) { enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io; @@ -160,21 +160,21 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, io = is_read ? &sbi->read_io : &sbi->write_io[btype]; - verify_block_addr(sbi, blk_addr); + verify_block_addr(sbi, fio->blk_addr); down_write(&io->io_rwsem); if (!is_read) inc_page_count(sbi, F2FS_WRITEBACK); - if (io->bio && (io->last_block_in_bio != blk_addr - 1 || + if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 || io->fio.rw != fio->rw)) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { int bio_blocks = MAX_BIO_BLOCKS(sbi); - io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); + io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read); io->fio = *fio; } @@ -184,10 +184,10 @@ alloc_new: goto alloc_new; } - io->last_block_in_bio = blk_addr; + io->last_block_in_bio = fio->blk_addr; up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); + trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, fio->blk_addr); } /* @@ -376,6 +376,10 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) struct dnode_of_data dn; struct page *page; int err; + struct f2fs_io_info fio = { + .type = DATA, + .rw = sync ? READ_SYNC : READA, + }; page = find_get_page(mapping, index); if (page && PageUptodate(page)) @@ -404,8 +408,8 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) return page; } - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr, - sync ? READ_SYNC : READA); + fio.blk_addr = dn.data_blkaddr; + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); if (err) return ERR_PTR(err); @@ -430,7 +434,10 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) struct dnode_of_data dn; struct page *page; int err; - + struct f2fs_io_info fio = { + .type = DATA, + .rw = READ_SYNC, + }; repeat: page = grab_cache_page(mapping, index); if (!page) @@ -464,8 +471,8 @@ repeat: return page; } - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, - dn.data_blkaddr, READ_SYNC); + fio.blk_addr = dn.data_blkaddr; + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); if (err) return ERR_PTR(err); @@ -515,8 +522,12 @@ repeat: zero_user_segment(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); } else { - err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, - dn.data_blkaddr, READ_SYNC); + struct f2fs_io_info fio = { + .type = DATA, + .rw = READ_SYNC, + .blk_addr = dn.data_blkaddr, + }; + err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio); if (err) goto put_err; @@ -745,7 +756,6 @@ static int f2fs_read_data_pages(struct file *file, int do_write_data_page(struct page *page, struct f2fs_io_info *fio) { struct inode *inode = page->mapping->host; - block_t old_blkaddr, new_blkaddr; struct dnode_of_data dn; int err = 0; @@ -754,10 +764,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) if (err) return err; - old_blkaddr = dn.data_blkaddr; + fio->blk_addr = dn.data_blkaddr; /* This page is already truncated */ - if (old_blkaddr == NULL_ADDR) + if (fio->blk_addr == NULL_ADDR) goto out_writepage; set_page_writeback(page); @@ -766,14 +776,14 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (unlikely(old_blkaddr != NEW_ADDR && + if (unlikely(fio->blk_addr != NEW_ADDR && !is_cold_data(page) && need_inplace_update(inode))) { - rewrite_data_page(page, old_blkaddr, fio); + rewrite_data_page(page, fio); set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); } else { - write_data_page(page, &dn, &new_blkaddr, fio); - update_extent_cache(new_blkaddr, &dn); + write_data_page(page, &dn, fio); + update_extent_cache(fio->blk_addr, &dn); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); } out_writepage: @@ -1007,8 +1017,12 @@ put_next: if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, - READ_SYNC); + struct f2fs_io_info fio = { + .type = DATA, + .rw = READ_SYNC, + .blk_addr = dn.data_blkaddr, + }; + err = f2fs_submit_page_bio(sbi, page, &fio); if (err) goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a8ccbceb869a..3f07b504f6d8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -491,6 +491,7 @@ enum page_type { struct f2fs_io_info { enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ + block_t blk_addr; /* block address to be written */ }; #define is_read_io(rw) (((rw) & 1) == READ) @@ -1414,10 +1415,10 @@ int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(struct f2fs_sb_info *, struct page *, - struct f2fs_io_info *, unsigned int, block_t, block_t *); -void write_data_page(struct page *, struct dnode_of_data *, block_t *, - struct f2fs_io_info *); -void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); + unsigned int, struct f2fs_io_info *); +void write_data_page(struct page *, struct dnode_of_data *, + struct f2fs_io_info *); +void rewrite_data_page(struct page *, struct f2fs_io_info *); void recover_data_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); void allocate_data_block(struct f2fs_sb_info *, struct page *, @@ -1464,8 +1465,9 @@ void destroy_checkpoint_caches(void); * data.c */ void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); -int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int); -void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t, +int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, + struct f2fs_io_info *); +void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index f2d3c581e776..0c3f3f9b9f88 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -79,7 +79,6 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { void *src_addr, *dst_addr; - block_t new_blk_addr; struct f2fs_io_info fio = { .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, @@ -115,9 +114,9 @@ no_update: /* write data page to try to make data consistent */ set_page_writeback(page); - - write_data_page(page, dn, &new_blk_addr, &fio); - update_extent_cache(new_blk_addr, dn); + fio.blk_addr = dn->data_blkaddr; + write_data_page(page, dn, &fio); + update_extent_cache(fio.blk_addr, dn); f2fs_wait_on_page_writeback(page, DATA); if (dirty) inode_dec_dirty_pages(dn->inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bcfd67c80196..adc35c978306 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -977,6 +977,10 @@ static int read_node_page(struct page *page, int rw) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; + struct f2fs_io_info fio = { + .type = NODE, + .rw = rw, + }; get_node_info(sbi, page->index, &ni); @@ -988,7 +992,8 @@ static int read_node_page(struct page *page, int rw) if (PageUptodate(page)) return LOCKED_PAGE; - return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw); + fio.blk_addr = ni.blk_addr; + return f2fs_submit_page_bio(sbi, page, &fio); } /* @@ -1269,7 +1274,6 @@ static int f2fs_write_node_page(struct page *page, { struct f2fs_sb_info *sbi = F2FS_P_SB(page); nid_t nid; - block_t new_addr; struct node_info ni; struct f2fs_io_info fio = { .type = NODE, @@ -1304,9 +1308,11 @@ static int f2fs_write_node_page(struct page *page, } else { down_read(&sbi->node_write); } + set_page_writeback(page); - write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); - set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); + fio.blk_addr = ni.blk_addr; + write_node_page(sbi, page, nid, &fio); + set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); unlock_page(page); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c950c9318e8e..c726f86c2ea0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1182,39 +1182,39 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, } static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, - block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, struct f2fs_io_info *fio) + struct f2fs_summary *sum, + struct f2fs_io_info *fio) { int type = __get_segment_type(page, fio->type); - allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type); + allocate_data_block(sbi, page, fio->blk_addr, &fio->blk_addr, sum, type); /* writeout dirty page into bdev */ - f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); + f2fs_submit_page_mbio(sbi, page, fio); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) { struct f2fs_io_info fio = { .type = META, - .rw = WRITE_SYNC | REQ_META | REQ_PRIO + .rw = WRITE_SYNC | REQ_META | REQ_PRIO, + .blk_addr = page->index, }; set_page_writeback(page); - f2fs_submit_page_mbio(sbi, page, page->index, &fio); + f2fs_submit_page_mbio(sbi, page, &fio); } void write_node_page(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_io_info *fio, - unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) + unsigned int nid, struct f2fs_io_info *fio) { struct f2fs_summary sum; set_summary(&sum, nid, 0, 0); - do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio); + do_write_page(sbi, page, &sum, fio); } void write_data_page(struct page *page, struct dnode_of_data *dn, - block_t *new_blkaddr, struct f2fs_io_info *fio) + struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; @@ -1223,14 +1223,12 @@ void write_data_page(struct page *page, struct dnode_of_data *dn, f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - - do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio); + do_write_page(sbi, page, &sum, fio); } -void rewrite_data_page(struct page *page, block_t old_blkaddr, - struct f2fs_io_info *fio) +void rewrite_data_page(struct page *page, struct f2fs_io_info *fio) { - f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio); + f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio); } void recover_data_page(struct f2fs_sb_info *sbi, From 63f92ddc8aee18838441179064338702a93326a9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 Dec 2014 19:45:05 -0800 Subject: [PATCH 12/64] f2fs: add f2fs_io_tracer support This patch adds: o initial trace.c and trace.h with skeleton functions o Kconfig and Makefile to activate this feature Signed-off-by: Jaegeuk Kim --- fs/f2fs/Kconfig | 10 ++++++++++ fs/f2fs/Makefile | 1 + fs/f2fs/trace.c | 24 ++++++++++++++++++++++++ fs/f2fs/trace.h | 24 ++++++++++++++++++++++++ 4 files changed, 59 insertions(+) create mode 100644 fs/f2fs/trace.c create mode 100644 fs/f2fs/trace.h diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 736a348509f7..94e2d2ffabe1 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -71,3 +71,13 @@ config F2FS_CHECK_FS Enables BUG_ONs which check the filesystem consistency in runtime. If you want to improve the performance, say N. + +config F2FS_IO_TRACE + bool "F2FS IO tracer" + depends on F2FS_FS + depends on FUNCTION_TRACER + help + F2FS IO trace is based on a function trace, which gathers process + information and block IO patterns in the filesystem level. + + If unsure, say N. diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 2e35da12d292..d92397731db8 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -5,3 +5,4 @@ f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o +f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c new file mode 100644 index 000000000000..4e4a0691d32f --- /dev/null +++ b/fs/f2fs/trace.c @@ -0,0 +1,24 @@ +/* + * f2fs IO tracer + * + * Copyright (c) 2014 Motorola Mobility + * Copyright (c) 2014 Jaegeuk Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +#include "f2fs.h" +#include "trace.h" + +void f2fs_trace_pid(struct page *page) +{ +} + +void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) +{ +} diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h new file mode 100644 index 000000000000..08856a978d6b --- /dev/null +++ b/fs/f2fs/trace.h @@ -0,0 +1,24 @@ +/* + * f2fs IO tracer + * + * Copyright (c) 2014 Motorola Mobility + * Copyright (c) 2014 Jaegeuk Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __F2FS_TRACE_H__ +#define __F2FS_TRACE_H__ + +#ifdef CONFIG_F2FS_IO_TRACE +#include + +extern void f2fs_trace_pid(struct page *); +extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int); +#else +#define f2fs_trace_pid(p) +#define f2fs_trace_ios(p, i, n) + +#endif +#endif /* __F2FS_TRACE_H__ */ From 0e689d036952a6018ed5f5c1aee7697f5c57cea1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 Dec 2014 19:51:57 -0800 Subject: [PATCH 13/64] f2fs: add key functions for f2fs_io_tracer This patch adds two key functions to trace process ids and IOs. The basic idea is to 1. remain process ids, pids, in page->private. 2. show pids in IO traces. So, later we can retrieve process information according to IO traces. Signed-off-by: Jaegeuk Kim --- fs/f2fs/trace.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/trace.h | 18 +++++++++++ 2 files changed, 104 insertions(+) diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 4e4a0691d32f..19f5216b9b9c 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -15,10 +15,96 @@ #include "f2fs.h" #include "trace.h" +RADIX_TREE(pids, GFP_NOIO); +struct last_io_info last_io; + +static inline void __print_last_io(void) +{ + if (!last_io.len) + return; + + trace_printk("%3x:%3x %4x %-16s %2x %5x %12x %4x\n", + last_io.major, last_io.minor, + last_io.pid, "----------------", + last_io.type, + last_io.fio.rw, last_io.fio.blk_addr, + last_io.len); + memset(&last_io, 0, sizeof(last_io)); +} + +static int __file_type(struct inode *inode, pid_t pid) +{ + if (f2fs_is_atomic_file(inode)) + return __ATOMIC_FILE; + else if (f2fs_is_volatile_file(inode)) + return __VOLATILE_FILE; + else if (S_ISDIR(inode->i_mode)) + return __DIR_FILE; + else if (inode->i_ino == F2FS_NODE_INO(F2FS_I_SB(inode))) + return __NODE_FILE; + else if (inode->i_ino == F2FS_META_INO(F2FS_I_SB(inode))) + return __META_FILE; + else if (pid) + return __NORMAL_FILE; + else + return __MISC_FILE; +} + void f2fs_trace_pid(struct page *page) { + struct inode *inode = page->mapping->host; + pid_t pid = task_pid_nr(current); + void *p; + + page->private = pid; + + p = radix_tree_lookup(&pids, pid); + if (p == current) + return; + if (p) + radix_tree_delete(&pids, pid); + + f2fs_radix_tree_insert(&pids, pid, current); + + trace_printk("%3x:%3x %4x %-16s\n", + MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), + pid, current->comm); + } void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) { + struct inode *inode; + pid_t pid; + int major, minor; + + if (flush) { + __print_last_io(); + return; + } + + inode = page->mapping->host; + pid = page_private(page); + + major = MAJOR(inode->i_sb->s_dev); + minor = MINOR(inode->i_sb->s_dev); + + if (last_io.major == major && last_io.minor == minor && + last_io.pid == pid && + last_io.type == __file_type(inode, pid) && + last_io.fio.rw == fio->rw && + last_io.fio.blk_addr + last_io.len == fio->blk_addr) { + last_io.len++; + return; + } + + __print_last_io(); + + last_io.major = major; + last_io.minor = minor; + last_io.pid = pid; + last_io.type = __file_type(inode, pid); + last_io.fio = *fio; + last_io.len = 1; + return; } diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index 08856a978d6b..aa6663be528c 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -14,6 +14,24 @@ #ifdef CONFIG_F2FS_IO_TRACE #include +enum file_type { + __NORMAL_FILE, + __DIR_FILE, + __NODE_FILE, + __META_FILE, + __ATOMIC_FILE, + __VOLATILE_FILE, + __MISC_FILE, +}; + +struct last_io_info { + int major, minor; + pid_t pid; + enum file_type type; + struct f2fs_io_info fio; + block_t len; +}; + extern void f2fs_trace_pid(struct page *); extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int); #else From 9e4ded3f309eb5b5a9be0ca2acd26e5ea7f00914 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 Dec 2014 19:58:58 -0800 Subject: [PATCH 14/64] f2fs: activate f2fs_trace_pid This patch activates f2fs_trace_pid. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +++ fs/f2fs/node.c | 2 ++ fs/f2fs/segment.c | 2 ++ 3 files changed, 7 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0ac7c39605ed..3999933f7a74 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -20,6 +20,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "trace.h" #include static struct kmem_cache *ino_entry_slab; @@ -301,6 +302,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); + f2fs_trace_pid(page); return 1; } return 0; @@ -712,6 +714,7 @@ void update_dirty_page(struct inode *inode, struct page *page) kmem_cache_free(inode_entry_slab, new); out: SetPagePrivate(page); + f2fs_trace_pid(page); } void add_dirty_dir_inode(struct inode *inode) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index adc35c978306..a7cb0db2e3e8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -19,6 +19,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "trace.h" #include #define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) @@ -1362,6 +1363,7 @@ static int f2fs_set_node_page_dirty(struct page *page) __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); SetPagePrivate(page); + f2fs_trace_pid(page); return 1; } return 0; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c726f86c2ea0..b688991514fb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -20,6 +20,7 @@ #include "f2fs.h" #include "segment.h" #include "node.h" +#include "trace.h" #include #define __reverse_ffz(x) __reverse_ffs(~(x)) @@ -181,6 +182,7 @@ void register_inmem_page(struct inode *inode, struct page *page) int err; SetPagePrivate(page); + f2fs_trace_pid(page); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); From db9f7c1a9561e998d6227bcc1c19bc4c1fbbca1b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 17 Dec 2014 20:04:08 -0800 Subject: [PATCH 15/64] f2fs: activate f2fs_trace_ios This patch activates f2fs_trace_ios. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ fs/f2fs/file.c | 2 ++ fs/f2fs/super.c | 2 ++ 3 files changed, 7 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d86f8b1413f1..20aa3c355a0e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -22,6 +22,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include "trace.h" #include static void f2fs_read_end_io(struct bio *bio, int err) @@ -137,6 +138,7 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, struct bio *bio; trace_f2fs_submit_page_bio(page, fio->blk_addr, fio->rw); + f2fs_trace_ios(page, fio, 0); /* Allocate a new bio */ bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw)); @@ -185,6 +187,7 @@ alloc_new: } io->last_block_in_bio = fio->blk_addr; + f2fs_trace_ios(page, fio, 0); up_write(&io->io_rwsem); trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, fio->blk_addr); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5139f90e92c1..f172ddc4ecdd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -26,6 +26,7 @@ #include "segment.h" #include "xattr.h" #include "acl.h" +#include "trace.h" #include static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, @@ -265,6 +266,7 @@ flush_out: ret = f2fs_issue_flush(sbi); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + f2fs_trace_ios(NULL, NULL, 1); return ret; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f71421d70475..cf68e44570bc 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -30,6 +30,7 @@ #include "segment.h" #include "xattr.h" #include "gc.h" +#include "trace.h" #define CREATE_TRACE_POINTS #include @@ -493,6 +494,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync) } else { f2fs_balance_fs(sbi); } + f2fs_trace_ios(NULL, NULL, 1); return 0; } From dd802406e396c22dfb5aa0d16196f04d515be49e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 18 Dec 2014 19:32:36 -0800 Subject: [PATCH 16/64] f2fs: avoid double lock for cp_rwsem The __f2fs_add_link is covered by cp_rwsem all the time. This calls init_inode_metadata, which conducts some acl operations including memory allocation with GFP_KERNEL previously. But, under memory pressure, f2fs_write_data_page can be called, which also grabs cp_rwsem too. In this case, this incurs a deadlock pointed by Chao. Thread #1 Thread #2 down_read down_write down_read -> here down_read should wait forever. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 1ccb26bc2a0b..7f12d28ad94c 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) if (count == 0) return NULL; - acl = posix_acl_alloc(count, GFP_KERNEL); + acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); @@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) int i; f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * - sizeof(struct f2fs_acl_entry), GFP_KERNEL); + sizeof(struct f2fs_acl_entry), GFP_NOFS); if (!f2fs_acl) return ERR_PTR(-ENOMEM); From b9a2c252071d44d4a22082611db84272be1f3b49 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Wed, 24 Dec 2014 02:16:54 +0900 Subject: [PATCH 17/64] f2fs: add block count by in-place-update in stat info This patch adds block count by in-place-update in stat. Signed-off-by: Changman Lee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 4 ++++ fs/f2fs/f2fs.h | 6 +++++- fs/f2fs/segment.c | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 91e8f699ab30..2b6422156ea0 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -79,6 +79,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->segment_count[i] = sbi->segment_count[i]; si->block_count[i] = sbi->block_count[i]; } + + si->inplace_count = atomic_read(&sbi->inplace_count); } /* @@ -277,6 +279,7 @@ static int stat_show(struct seq_file *s, void *v) for (j = 0; j < si->util_free; j++) seq_putc(s, '-'); seq_puts(s, "]\n\n"); + seq_printf(s, "IPU: %u blocks\n", si->inplace_count); seq_printf(s, "SSR: %u blocks in %u segments\n", si->block_count[SSR], si->segment_count[SSR]); seq_printf(s, "LFS: %u blocks in %u segments\n", @@ -331,6 +334,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inline_inode, 0); atomic_set(&sbi->inline_dir, 0); + atomic_set(&sbi->inplace_count, 0); mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3f07b504f6d8..3226af06e050 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -592,6 +592,7 @@ struct f2fs_sb_info { struct f2fs_stat_info *stat_info; /* FS status information */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ + atomic_t inplace_count; /* # of inplace update */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ @@ -1523,6 +1524,7 @@ struct f2fs_stat_info { unsigned int segment_count[2]; unsigned int block_count[2]; + unsigned int inplace_count; unsigned base_mem, cache_mem; }; @@ -1562,7 +1564,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ ((sbi)->block_count[(curseg)->alloc_type]++) - +#define stat_inc_inplace_blocks(sbi) \ + (atomic_inc(&(sbi)->inplace_count)) #define stat_inc_seg_count(sbi, type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ @@ -1608,6 +1611,7 @@ void f2fs_destroy_root_stats(void); #define stat_dec_inline_dir(inode) #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) +#define stat_inc_inplace_blocks(sbi) #define stat_inc_seg_count(si, type) #define stat_inc_tot_blk_count(si, blks) #define stat_inc_data_blk_count(si, blks) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b688991514fb..f995a850dfe7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1230,6 +1230,7 @@ void write_data_page(struct page *page, struct dnode_of_data *dn, void rewrite_data_page(struct page *page, struct f2fs_io_info *fio) { + stat_inc_inplace_blocks(F2FS_P_SB(page)); f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio); } From 09eb483e895f36fd002e88c878e9578c359aa468 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 23 Dec 2014 16:26:31 -0800 Subject: [PATCH 18/64] f2fs: fix missing cold bit during recovery In do_recover_data, we find and update previous node pages after updating its new block addresses. After then, we call fill_node_footer without reset field, we erase its cold bit so that this new cold node block is written to wrong log area. This patch fixes not to miss its old flag. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 10 +++++++++- include/linux/f2fs_fs.h | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index fa6f95968b42..cac8a3d9acbd 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -212,11 +212,19 @@ static inline void fill_node_footer(struct page *page, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { struct f2fs_node *rn = F2FS_NODE(page); + unsigned int old_flag = 0; + if (reset) memset(rn, 0, sizeof(*rn)); + else + old_flag = le32_to_cpu(rn->footer.flag); + rn->footer.nid = cpu_to_le32(nid); rn->footer.ino = cpu_to_le32(ino); - rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); + + /* should remain old flag bits such as COLD_BIT_SHIFT */ + rn->footer.flag = cpu_to_le32((ofs << OFFSET_BIT_SHIFT) | + (old_flag & OFFSET_BIT_MASK)); } static inline void copy_node_footer(struct page *dst, struct page *src) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 87f14e90e984..e993b0bc9abf 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -224,6 +224,8 @@ enum { OFFSET_BIT_SHIFT }; +#define OFFSET_BIT_MASK (0x07) /* (0x01 << OFFSET_BIT_SHIFT) - 1 */ + struct node_footer { __le32 nid; /* node id */ __le32 ino; /* inode nunmber */ From 3e1c8f125eeea0f8111e2b9131162bfba32c6381 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 23 Dec 2014 16:35:21 +0800 Subject: [PATCH 19/64] f2fs: cleanup trace event of f2fs_submit_page_{m,}bio with DECLARE_EVENT_CLASS This patch adds missing parameter _type_ for trace_f2fs_submit_page_bio, then use DECLARE_EVENT_CLASS/DEFINE_EVENT_CONDITION pair to cleanup some trace event code related to f2fs_submit_page_{m,}bio. Additionally, after we remove redundant code, size of code can be reduced: text data bss dec hex filename 176787 8712 56 185555 2d4d3 f2fs.ko.org 174408 8648 56 183112 2cb48 f2fs.ko Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +- include/trace/events/f2fs.h | 115 ++++++++++++++++-------------------- 2 files changed, 53 insertions(+), 66 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 20aa3c355a0e..7953bc279205 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -137,7 +137,7 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, { struct bio *bio; - trace_f2fs_submit_page_bio(page, fio->blk_addr, fio->rw); + trace_f2fs_submit_page_bio(page, fio->blk_addr, fio->rw, fio->type); f2fs_trace_ios(page, fio, 0); /* Allocate a new bio */ @@ -190,7 +190,7 @@ alloc_new: f2fs_trace_ios(page, fio, 0); up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, fio->blk_addr); + trace_f2fs_submit_page_mbio(page, fio->blk_addr, fio->rw, fio->type); } /* diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index bbc4de9baef7..553311f14f6c 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -440,38 +440,6 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, __entry->err) ); -TRACE_EVENT_CONDITION(f2fs_submit_page_bio, - - TP_PROTO(struct page *page, sector_t blkaddr, int type), - - TP_ARGS(page, blkaddr, type), - - TP_CONDITION(page->mapping), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(pgoff_t, index) - __field(sector_t, blkaddr) - __field(int, type) - ), - - TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; - __entry->index = page->index; - __entry->blkaddr = blkaddr; - __entry->type = type; - ), - - TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " - "blkaddr = 0x%llx, bio_type = %s%s", - show_dev_ino(__entry), - (unsigned long)__entry->index, - (unsigned long long)__entry->blkaddr, - show_bio_type(__entry->type)) -); - TRACE_EVENT(f2fs_get_data_block, TP_PROTO(struct inode *inode, sector_t iblock, struct buffer_head *bh, int ret), @@ -680,6 +648,57 @@ TRACE_EVENT(f2fs_reserve_new_block, __entry->ofs_in_node) ); +DECLARE_EVENT_CLASS(f2fs__submit_page_bio, + + TP_PROTO(struct page *page, block_t blkaddr, int rw, int type), + + TP_ARGS(page, blkaddr, rw, type), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(pgoff_t, index) + __field(block_t, blkaddr) + __field(int, rw) + __field(int, type) + ), + + TP_fast_assign( + __entry->dev = page->mapping->host->i_sb->s_dev; + __entry->ino = page->mapping->host->i_ino; + __entry->index = page->index; + __entry->blkaddr = blkaddr; + __entry->rw = rw; + __entry->type = type; + ), + + TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " + "blkaddr = 0x%llx, rw = %s%s, type = %s", + show_dev_ino(__entry), + (unsigned long)__entry->index, + (unsigned long long)__entry->blkaddr, + show_bio_type(__entry->rw), + show_block_type(__entry->type)) +); + +DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio, + + TP_PROTO(struct page *page, block_t blkaddr, int rw, int type), + + TP_ARGS(page, blkaddr, rw, type), + + TP_CONDITION(page->mapping) +); + +DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio, + + TP_PROTO(struct page *page, block_t blkaddr, int rw, int type), + + TP_ARGS(page, blkaddr, rw, type), + + TP_CONDITION(page->mapping) +); + DECLARE_EVENT_CLASS(f2fs__submit_bio, TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), @@ -916,38 +935,6 @@ TRACE_EVENT(f2fs_writepages, __entry->for_sync) ); -TRACE_EVENT(f2fs_submit_page_mbio, - - TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), - - TP_ARGS(page, rw, type, blk_addr), - - TP_STRUCT__entry( - __field(dev_t, dev) - __field(ino_t, ino) - __field(int, rw) - __field(int, type) - __field(pgoff_t, index) - __field(block_t, block) - ), - - TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->ino = page->mapping->host->i_ino; - __entry->rw = rw; - __entry->type = type; - __entry->index = page->index; - __entry->block = blk_addr; - ), - - TP_printk("dev = (%d,%d), ino = %lu, %s%s, %s, index = %lu, blkaddr = 0x%llx", - show_dev_ino(__entry), - show_bio_type(__entry->rw), - show_block_type(__entry->type), - (unsigned long)__entry->index, - (unsigned long long)__entry->block) -); - TRACE_EVENT(f2fs_write_checkpoint, TP_PROTO(struct super_block *sb, int reason, char *msg), From 2ace38e00e54f5c722d8c5eba36d1172548a3466 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Dec 2014 16:08:14 +0800 Subject: [PATCH 20/64] f2fs: cleanup parameters for trace_f2fs_submit_{read_,write_,page_,page_m}bio with fio Cleanup parameters for trace_f2fs_submit_{read_,write_,page_,page_m}bio with fio as one parameter. Suggested-by: Jaegeuk Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 ++++------ include/trace/events/f2fs.h | 37 ++++++++++++++++++++----------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7953bc279205..6308435028f6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -96,11 +96,9 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) return; if (is_read_io(fio->rw)) - trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw, - fio->type, io->bio); + trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio); else - trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw, - fio->type, io->bio); + trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); submit_bio(fio->rw, io->bio); io->bio = NULL; @@ -137,7 +135,7 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, { struct bio *bio; - trace_f2fs_submit_page_bio(page, fio->blk_addr, fio->rw, fio->type); + trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(page, fio, 0); /* Allocate a new bio */ @@ -190,7 +188,7 @@ alloc_new: f2fs_trace_ios(page, fio, 0); up_write(&io->io_rwsem); - trace_f2fs_submit_page_mbio(page, fio->blk_addr, fio->rw, fio->type); + trace_f2fs_submit_page_mbio(page, fio); } /* diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 553311f14f6c..13992f3c1445 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -650,9 +650,9 @@ TRACE_EVENT(f2fs_reserve_new_block, DECLARE_EVENT_CLASS(f2fs__submit_page_bio, - TP_PROTO(struct page *page, block_t blkaddr, int rw, int type), + TP_PROTO(struct page *page, struct f2fs_io_info *fio), - TP_ARGS(page, blkaddr, rw, type), + TP_ARGS(page, fio), TP_STRUCT__entry( __field(dev_t, dev) @@ -667,9 +667,9 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __entry->dev = page->mapping->host->i_sb->s_dev; __entry->ino = page->mapping->host->i_ino; __entry->index = page->index; - __entry->blkaddr = blkaddr; - __entry->rw = rw; - __entry->type = type; + __entry->blkaddr = fio->blk_addr; + __entry->rw = fio->rw; + __entry->type = fio->type; ), TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " @@ -683,27 +683,28 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio, - TP_PROTO(struct page *page, block_t blkaddr, int rw, int type), + TP_PROTO(struct page *page, struct f2fs_io_info *fio), - TP_ARGS(page, blkaddr, rw, type), + TP_ARGS(page, fio), TP_CONDITION(page->mapping) ); DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio, - TP_PROTO(struct page *page, block_t blkaddr, int rw, int type), + TP_PROTO(struct page *page, struct f2fs_io_info *fio), - TP_ARGS(page, blkaddr, rw, type), + TP_ARGS(page, fio), TP_CONDITION(page->mapping) ); DECLARE_EVENT_CLASS(f2fs__submit_bio, - TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, + struct bio *bio), - TP_ARGS(sb, rw, type, bio), + TP_ARGS(sb, fio, bio), TP_STRUCT__entry( __field(dev_t, dev) @@ -715,8 +716,8 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, TP_fast_assign( __entry->dev = sb->s_dev; - __entry->rw = rw; - __entry->type = type; + __entry->rw = fio->rw; + __entry->type = fio->type; __entry->sector = bio->bi_iter.bi_sector; __entry->size = bio->bi_iter.bi_size; ), @@ -731,18 +732,20 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio, - TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, + struct bio *bio), - TP_ARGS(sb, rw, type, bio), + TP_ARGS(sb, fio, bio), TP_CONDITION(bio) ); DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio, - TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio), + TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, + struct bio *bio), - TP_ARGS(sb, rw, type, bio), + TP_ARGS(sb, fio, bio), TP_CONDITION(bio) ); From 062920734c0de9dd4f0a9bdc36fdcabc2751eb34 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 29 Dec 2014 15:56:18 +0800 Subject: [PATCH 21/64] f2fs: reuse inode_entry_slab in gc procedure for using slab more effectively There are two slab cache inode_entry_slab and winode_slab using the same structure as below: struct dir_inode_entry { struct list_head list; /* list head */ struct inode *inode; /* vfs inode pointer */ }; struct inode_entry { struct list_head list; struct inode *inode; }; It's a little waste that the two cache can not share their memory space for each other. So in this patch we remove one redundant winode_slab slab cache, then use more universal name struct inode_entry as remaining data structure name of slab, finally we reuse the inode_entry_slab to store dirty dir item and gc item for more effective. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 18 +++++++++--------- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 14 +++++++++----- fs/f2fs/gc.c | 20 ++------------------ fs/f2fs/gc.h | 7 ++----- fs/f2fs/super.c | 8 +------- 6 files changed, 24 insertions(+), 45 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3999933f7a74..9f5317c9ad72 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -24,7 +24,7 @@ #include static struct kmem_cache *ino_entry_slab; -static struct kmem_cache *inode_entry_slab; +struct kmem_cache *inode_entry_slab; /* * We guarantee no failure on the returned page. @@ -673,7 +673,7 @@ fail_no_cp: return -EINVAL; } -static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) +static int __add_dirty_inode(struct inode *inode, struct inode_entry *new) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -690,7 +690,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) void update_dirty_page(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *new; + struct inode_entry *new; int ret = 0; if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) @@ -720,7 +720,7 @@ out: void add_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *new = + struct inode_entry *new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); int ret = 0; @@ -738,7 +738,7 @@ void add_dirty_dir_inode(struct inode *inode) void remove_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dir_inode_entry *entry; + struct inode_entry *entry; if (!S_ISDIR(inode->i_mode)) return; @@ -768,7 +768,7 @@ void remove_dirty_dir_inode(struct inode *inode) void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) { struct list_head *head; - struct dir_inode_entry *entry; + struct inode_entry *entry; struct inode *inode; retry: if (unlikely(f2fs_cp_error(sbi))) @@ -781,7 +781,7 @@ retry: spin_unlock(&sbi->dir_inode_lock); return; } - entry = list_entry(head->next, struct dir_inode_entry, list); + entry = list_entry(head->next, struct inode_entry, list); inode = igrab(entry->inode); spin_unlock(&sbi->dir_inode_lock); if (inode) { @@ -1107,8 +1107,8 @@ int __init create_checkpoint_caches(void) sizeof(struct ino_entry)); if (!ino_entry_slab) return -ENOMEM; - inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", - sizeof(struct dir_inode_entry)); + inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry", + sizeof(struct inode_entry)); if (!inode_entry_slab) { kmem_cache_destroy(ino_entry_slab); return -ENOMEM; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 2b6422156ea0..dd7835b27498 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -172,7 +172,7 @@ get_cache: si->cache_mem += npages << PAGE_CACHE_SHIFT; npages = META_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; - si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); + si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); for (i = 0; i <= UPDATE_INO; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3226af06e050..c48847e06dae 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -136,8 +136,14 @@ struct ino_entry { nid_t ino; /* inode number */ }; -/* for the list of directory inodes */ -struct dir_inode_entry { +/* + * for the list of directory inodes or gc inodes. + * NOTE: there are two slab users for this structure, if we add/modify/delete + * fields in structure for one of slab users, it may affect fields or size of + * other one, in this condition, it's better to split both of slab and related + * data structure. + */ +struct inode_entry { struct list_head list; /* list head */ struct inode *inode; /* vfs inode pointer */ }; @@ -297,7 +303,7 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ struct extent_info ext; /* in-memory extent cache entry */ - struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ + struct inode_entry *dirty_dir; /* the pointer of dirty dir */ struct radix_tree_root inmem_root; /* radix tree for inmem pages */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ @@ -1487,8 +1493,6 @@ void stop_gc_thread(struct f2fs_sb_info *); block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); int f2fs_gc(struct f2fs_sb_info *); void build_gc_manager(struct f2fs_sb_info *); -int __init create_gc_caches(void); -void destroy_gc_caches(void); /* * recovery.c diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index eec0933a4819..40887d3c9d01 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -24,8 +24,6 @@ #include "gc.h" #include -static struct kmem_cache *winode_slab; - static int gc_thread_func(void *data) { struct f2fs_sb_info *sbi = data; @@ -356,7 +354,7 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) iput(inode); return; } - new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); + new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); new_ie->inode = inode; retry: if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { @@ -373,7 +371,7 @@ static void put_gc_inode(struct gc_inode_list *gc_list) radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); iput(ie->inode); list_del(&ie->list); - kmem_cache_free(winode_slab, ie); + kmem_cache_free(inode_entry_slab, ie); } } @@ -750,17 +748,3 @@ void build_gc_manager(struct f2fs_sb_info *sbi) { DIRTY_I(sbi)->v_ops = &default_v_ops; } - -int __init create_gc_caches(void) -{ - winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", - sizeof(struct inode_entry)); - if (!winode_slab) - return -ENOMEM; - return 0; -} - -void destroy_gc_caches(void) -{ - kmem_cache_destroy(winode_slab); -} diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 6ff7ad38463e..524543a6a34a 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -35,16 +35,13 @@ struct f2fs_gc_kthread { unsigned int gc_idle; }; -struct inode_entry { - struct list_head list; - struct inode *inode; -}; - struct gc_inode_list { struct list_head ilist; struct radix_tree_root iroot; }; +extern struct kmem_cache *inode_entry_slab; + /* * inline functions */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cf68e44570bc..0ae6a2fbdb2a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1231,12 +1231,9 @@ static int __init init_f2fs_fs(void) err = create_segment_manager_caches(); if (err) goto free_node_manager_caches; - err = create_gc_caches(); - if (err) - goto free_segment_manager_caches; err = create_checkpoint_caches(); if (err) - goto free_gc_caches; + goto free_segment_manager_caches; f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); if (!f2fs_kset) { err = -ENOMEM; @@ -1253,8 +1250,6 @@ free_kset: kset_unregister(f2fs_kset); free_checkpoint_caches: destroy_checkpoint_caches(); -free_gc_caches: - destroy_gc_caches(); free_segment_manager_caches: destroy_segment_manager_caches(); free_node_manager_caches: @@ -1271,7 +1266,6 @@ static void __exit exit_f2fs_fs(void) f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); destroy_checkpoint_caches(); - destroy_gc_caches(); destroy_segment_manager_caches(); destroy_node_manager_caches(); destroy_inodecache(); From e1509cf294cc670cda1fedd430f0ff175c42b591 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 30 Dec 2014 22:57:55 -0800 Subject: [PATCH 22/64] f2fs: clean up to remove parameter This patch uses dn->data_blkaddr as a parameter for the destination block address. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 27 ++++++++++++++------------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 3 ++- fs/f2fs/inline.c | 2 +- fs/f2fs/recovery.c | 3 ++- fs/f2fs/segment.c | 1 + 6 files changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6308435028f6..2c0cb6617918 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -197,7 +197,7 @@ alloc_new: * ->node_page * update block addresses in the node page */ -static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) +static void __set_data_blkaddr(struct dnode_of_data *dn) { struct f2fs_node *rn; __le32 *addr_array; @@ -210,7 +210,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) /* Get physical address of data block */ addr_array = blkaddr_in_node(rn); - addr_array[ofs_in_node] = cpu_to_le32(new_addr); + addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr); set_page_dirty(node_page); } @@ -225,8 +225,8 @@ int reserve_new_block(struct dnode_of_data *dn) trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); - __set_data_blkaddr(dn, NEW_ADDR); dn->data_blkaddr = NEW_ADDR; + __set_data_blkaddr(dn); mark_inode_dirty(dn->inode); sync_inode_page(dn); return 0; @@ -291,19 +291,19 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, return 0; } -void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) +void update_extent_cache(struct dnode_of_data *dn) { struct f2fs_inode_info *fi = F2FS_I(dn->inode); pgoff_t fofs, start_fofs, end_fofs; block_t start_blkaddr, end_blkaddr; int need_update = true; - f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR); + f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + dn->ofs_in_node; /* Update the page address in the parent node */ - __set_data_blkaddr(dn, blk_addr); + __set_data_blkaddr(dn); if (is_inode_flag_set(fi, FI_NO_EXTENT)) return; @@ -321,16 +321,16 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) /* Initial extent */ if (fi->ext.len == 0) { - if (blk_addr != NULL_ADDR) { + if (dn->data_blkaddr != NULL_ADDR) { fi->ext.fofs = fofs; - fi->ext.blk_addr = blk_addr; + fi->ext.blk_addr = dn->data_blkaddr; fi->ext.len = 1; } goto end_update; } /* Front merge */ - if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { + if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { fi->ext.fofs--; fi->ext.blk_addr--; fi->ext.len++; @@ -338,7 +338,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) } /* Back merge */ - if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { + if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) { fi->ext.len++; goto end_update; } @@ -572,8 +572,8 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) return -ENOSPC; - __set_data_blkaddr(dn, NEW_ADDR); dn->data_blkaddr = NEW_ADDR; + __set_data_blkaddr(dn); get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); @@ -584,7 +584,8 @@ static int __allocate_data_block(struct dnode_of_data *dn) /* direct IO doesn't use extent cache to maximize the performance */ set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); - update_extent_cache(new_blkaddr, dn); + dn->data_blkaddr = new_blkaddr; + update_extent_cache(dn); clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); /* update i_size */ @@ -784,7 +785,7 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); } else { write_data_page(page, &dn, fio); - update_extent_cache(fio->blk_addr, &dn); + update_extent_cache(&dn); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); } out_writepage: diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c48847e06dae..37e935a362cd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1478,7 +1478,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, struct f2fs_io_info *); int reserve_new_block(struct dnode_of_data *); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); -void update_extent_cache(block_t, struct dnode_of_data *); +void update_extent_cache(struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f172ddc4ecdd..5df336757d6c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -429,7 +429,8 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) if (blkaddr == NULL_ADDR) continue; - update_extent_cache(NULL_ADDR, dn); + dn->data_blkaddr = NULL_ADDR; + update_extent_cache(dn); invalidate_blocks(sbi, blkaddr); nr_free++; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 0c3f3f9b9f88..fa2aa2f7bb96 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -116,7 +116,7 @@ no_update: set_page_writeback(page); fio.blk_addr = dn->data_blkaddr; write_data_page(page, dn, &fio); - update_extent_cache(fio.blk_addr, dn); + update_extent_cache(dn); f2fs_wait_on_page_writeback(page, DATA); if (dirty) inode_dec_dirty_pages(dn->inode); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9160a37e1c7a..c4211a5862df 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -396,7 +396,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* write dummy data page */ recover_data_page(sbi, NULL, &sum, src, dest); - update_extent_cache(dest, &dn); + dn.data_blkaddr = dest; + update_extent_cache(&dn); recovered++; } dn.ofs_in_node++; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f995a850dfe7..8144a30b1567 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1226,6 +1226,7 @@ void write_data_page(struct page *page, struct dnode_of_data *dn, get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); do_write_page(sbi, page, &sum, fio); + dn->data_blkaddr = fio->blk_addr; } void rewrite_data_page(struct page *page, struct f2fs_io_info *fio) From 3547ea961dd66a474c6f709c4f5e8a2472289df9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 30 Dec 2014 23:08:26 -0800 Subject: [PATCH 23/64] f2fs: avoid potential unnecessary codes This patch relocates some operations to avoid unnecessary execution. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++-- fs/f2fs/node.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2c0cb6617918..155885bf714c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -299,8 +299,6 @@ void update_extent_cache(struct dnode_of_data *dn) int need_update = true; f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); - fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + - dn->ofs_in_node; /* Update the page address in the parent node */ __set_data_blkaddr(dn); @@ -308,6 +306,9 @@ void update_extent_cache(struct dnode_of_data *dn) if (is_inode_flag_set(fi, FI_NO_EXTENT)) return; + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; + write_lock(&fi->ext.ext_lock); start_fofs = fi->ext.fofs; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a7cb0db2e3e8..9bed0161efee 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -348,7 +348,6 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) struct nat_entry *e; int i; - memset(&ne, 0, sizeof(struct f2fs_nat_entry)); ni->nid = nid; /* Check nat cache */ @@ -363,6 +362,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) if (e) return; + memset(&ne, 0, sizeof(struct f2fs_nat_entry)); + /* Check current segment summary */ mutex_lock(&curseg->curseg_mutex); i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); From 41ef94b35c8df8e01780b4a3362246c51f3aa79b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 30 Dec 2014 23:12:11 -0800 Subject: [PATCH 24/64] f2fs: remove uncovered code path This patch removes unnecessary function calls. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 155885bf714c..3e0f5f303c97 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -563,31 +563,22 @@ static int __allocate_data_block(struct dnode_of_data *dn) struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_inode_info *fi = F2FS_I(dn->inode); struct f2fs_summary sum; - block_t new_blkaddr; struct node_info ni; pgoff_t fofs; - int type; if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return -EPERM; if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) return -ENOSPC; - dn->data_blkaddr = NEW_ADDR; - __set_data_blkaddr(dn); - get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - type = CURSEG_WARM_DATA; - - allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); + allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, + CURSEG_WARM_DATA); /* direct IO doesn't use extent cache to maximize the performance */ - set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); - dn->data_blkaddr = new_blkaddr; - update_extent_cache(dn); - clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); + __set_data_blkaddr(dn); /* update i_size */ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + @@ -595,7 +586,6 @@ static int __allocate_data_block(struct dnode_of_data *dn) if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); - dn->data_blkaddr = new_blkaddr; return 0; } From 38aa0889b2504bbe68e47f51cf73bf7f0a7246bd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 5 Jan 2015 16:02:20 -0800 Subject: [PATCH 25/64] f2fs: align direct_io'ed data to section This patch aligns the start block address of a file for direct io to the f2fs's section size. Some flash devices manage an over 4KB-sized page as a write unit, and if the direct_io'ed data are written but not aligned to that unit, the performance can be degraded due to the partial page copies. Thus, since f2fs has a section that is well aligned to FTL units, we can align the block address to the section size so that f2fs avoids this misalignment. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 +++++-- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/segment.c | 27 +++++++++++++++++++-------- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3e0f5f303c97..b48b355104c3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -564,6 +564,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) struct f2fs_inode_info *fi = F2FS_I(dn->inode); struct f2fs_summary sum; struct node_info ni; + int seg = CURSEG_WARM_DATA; pgoff_t fofs; if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) @@ -574,8 +575,10 @@ static int __allocate_data_block(struct dnode_of_data *dn) get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, - CURSEG_WARM_DATA); + if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) + seg = CURSEG_DIRECT_IO; + + allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); /* direct IO doesn't use extent cache to maximize the performance */ __set_data_blkaddr(dn); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 37e935a362cd..ba30218770da 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -406,7 +406,8 @@ enum { CURSEG_HOT_NODE, /* direct node blocks of directory files */ CURSEG_WARM_NODE, /* direct node blocks of normal files */ CURSEG_COLD_NODE, /* indirect node blocks */ - NO_CHECK_TYPE + NO_CHECK_TYPE, + CURSEG_DIRECT_IO, /* to use for the direct IO path */ }; struct flush_cmd { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8144a30b1567..31c4e5702c7d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1024,18 +1024,22 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } +static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int old_segno; + + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + locate_dirty_segment(sbi, old_segno); +} + void allocate_new_segments(struct f2fs_sb_info *sbi) { - struct curseg_info *curseg; - unsigned int old_curseg; int i; - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { - curseg = CURSEG_I(sbi, i); - old_curseg = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); - locate_dirty_segment(sbi, old_curseg); - } + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) + __allocate_new_segments(sbi, i); } static const struct segment_allocation default_salloc_ops = { @@ -1148,11 +1152,18 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; + bool direct_io = (type == CURSEG_DIRECT_IO); + + type = direct_io ? CURSEG_WARM_DATA : type; curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); + /* direct_io'ed data is aligned to the segment for better performance */ + if (direct_io && curseg->next_blkoff) + __allocate_new_segments(sbi, type); + *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); /* From 9e5ba77fdbe3fea86b1e36903dd696b24b1c4607 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 Jan 2015 14:28:43 +0800 Subject: [PATCH 26/64] f2fs: get rid of kzalloc in __recover_inline_status We use kzalloc to allocate memory in __recover_inline_status, and use this all-zero memory to check the inline date content of inode page by comparing them. This is low effective and not needed, let's check inline date content directly. Signed-off-by: Chao Yu [Jaegeuk Kim: make the code more neat] Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 3a8958d1684f..2d002e3738a7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -67,29 +67,23 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } -static int __recover_inline_status(struct inode *inode, struct page *ipage) +static void __recover_inline_status(struct inode *inode, struct page *ipage) { void *inline_data = inline_data_addr(ipage); - struct f2fs_inode *ri; - void *zbuf; + __le32 *start = inline_data; + __le32 *end = start + MAX_INLINE_DATA / sizeof(__le32); - zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS); - if (!zbuf) - return -ENOMEM; + while (start < end) { + if (*start++) { + f2fs_wait_on_page_writeback(ipage, NODE); - if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) { - kfree(zbuf); - return 0; + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage)); + set_page_dirty(ipage); + return; + } } - kfree(zbuf); - - f2fs_wait_on_page_writeback(ipage, NODE); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); - - ri = F2FS_INODE(ipage); - set_raw_inline(F2FS_I(inode), ri); - set_page_dirty(ipage); - return 0; + return; } static int do_read_inode(struct inode *inode) @@ -98,7 +92,6 @@ static int do_read_inode(struct inode *inode) struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; struct f2fs_inode *ri; - int err = 0; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) { @@ -142,7 +135,7 @@ static int do_read_inode(struct inode *inode) /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) - err = __recover_inline_status(inode, node_page); + __recover_inline_status(inode, node_page); /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); @@ -152,7 +145,7 @@ static int do_read_inode(struct inode *inode) stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); - return err; + return 0; } struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) From df1991391f9301efbd5e46fb776bf5103d4c59f8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 6 Jan 2015 16:00:03 -0800 Subject: [PATCH 27/64] f2fs: fix wrong unlock_page call This patch removes wrongly called unlock_page. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b48b355104c3..a7b905cb05f8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -837,7 +837,6 @@ write: /* we should bypass data pages to proceed the kworkder jobs */ if (unlikely(f2fs_cp_error(sbi))) { SetPageError(page); - unlock_page(page); goto out; } From 7aed0d45376e531330cf20af581a76edc0347d06 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 7 Jan 2015 10:47:57 -0800 Subject: [PATCH 28/64] f2fs: free radix_tree_nodes used by nat_set entries In the normal case, the radix_tree_nodes are freed successfully. But, when cp_error was detected, we should destroy them forcefully. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 21 +++++++++++++++++++-- fs/f2fs/node.h | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9bed0161efee..d7c143626705 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1894,7 +1894,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - struct nat_entry_set *setvec[NATVEC_SIZE]; + struct nat_entry_set *setvec[SETVEC_SIZE]; struct nat_entry_set *set, *tmp; unsigned int found; nid_t set_idx = 0; @@ -1911,7 +1911,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) remove_nats_in_journal(sbi); while ((found = __gang_lookup_nat_set(nm_i, - set_idx, NATVEC_SIZE, setvec))) { + set_idx, SETVEC_SIZE, setvec))) { unsigned idx; set_idx = setvec[found - 1]->set + 1; for (idx = 0; idx < found; idx++) @@ -1991,6 +1991,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *next_i; struct nat_entry *natvec[NATVEC_SIZE]; + struct nat_entry_set *setvec[SETVEC_SIZE]; nid_t nid = 0; unsigned int found; @@ -2015,11 +2016,27 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; + nid = nat_get_nid(natvec[found - 1]) + 1; for (idx = 0; idx < found; idx++) __del_from_nat_cache(nm_i, natvec[idx]); } f2fs_bug_on(sbi, nm_i->nat_cnt); + + /* destroy nat set cache */ + nid = 0; + while ((found = __gang_lookup_nat_set(nm_i, + nid, SETVEC_SIZE, setvec))) { + unsigned idx; + + nid = setvec[found - 1]->set + 1; + for (idx = 0; idx < found; idx++) { + /* entry_cnt is not zero, when cp_error was occurred */ + f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list)); + radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set); + kmem_cache_free(nat_entry_set_slab, setvec[idx]); + } + } up_write(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index cac8a3d9acbd..f405bbf2435a 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -25,6 +25,7 @@ /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 +#define SETVEC_SIZE 32 /* return value for read_node_page */ #define LOCKED_PAGE 1 From dd4e4b59b1a4a7e69083e7dc2abbedb5186fd850 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 7 Jan 2015 11:09:37 -0800 Subject: [PATCH 29/64] f2fs: add nat/sit entries into status This patch adds NAT/SIT entry informations. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 8 +++++--- fs/f2fs/f2fs.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index dd7835b27498..1f0fb58072e7 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -57,7 +57,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->node_pages = NODE_MAPPING(sbi)->nrpages; si->meta_pages = META_MAPPING(sbi)->nrpages; si->nats = NM_I(sbi)->nat_cnt; - si->sits = SIT_I(sbi)->dirty_sentries; + si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; + si->sits = MAIN_SEGS(sbi); + si->dirty_sits = SIT_I(sbi)->dirty_sentries; si->fnids = NM_I(sbi)->fcnt; si->bg_gc = sbi->bg_gc; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) @@ -260,8 +262,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_dent, si->ndirty_dirs); seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); - seq_printf(s, " - NATs: %9d\n - SITs: %9d\n", - si->nats, si->sits); + seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", + si->dirty_nats, si->nats, si->dirty_sits, si->sits); seq_printf(s, " - free_nids: %9d\n", si->fnids); seq_puts(s, "\nDistribution of User Blocks:"); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ba30218770da..209532cbee43 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1512,7 +1512,7 @@ struct f2fs_stat_info { int main_area_segs, main_area_sections, main_area_zones; int hit_ext, total_ext; int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; - int nats, sits, fnids; + int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; int bg_gc, inline_inode, inline_dir, inmem_pages; unsigned int valid_count, valid_node_count, valid_inode_count; From c05086506f2204bcf02c9d3eb7950cdf0d8a3bef Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 7 Jan 2015 14:07:36 -0800 Subject: [PATCH 30/64] f2fs: add spin_lock to cover radix operations in IO tracer This patch adds spin_lock to cover radix tree operations in IO tracer. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 ++ fs/f2fs/trace.c | 18 +++++++++++++++--- fs/f2fs/trace.h | 2 ++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0ae6a2fbdb2a..e6f035c868d3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1222,6 +1222,8 @@ static int __init init_f2fs_fs(void) { int err; + f2fs_build_trace_ios(); + err = init_inodecache(); if (err) goto fail; diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 19f5216b9b9c..92fa38a47e63 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -15,7 +15,8 @@ #include "f2fs.h" #include "trace.h" -RADIX_TREE(pids, GFP_NOIO); +RADIX_TREE(pids, GFP_ATOMIC); +spinlock_t pids_lock; struct last_io_info last_io; static inline void __print_last_io(void) @@ -58,9 +59,13 @@ void f2fs_trace_pid(struct page *page) page->private = pid; + if (radix_tree_preload(GFP_NOFS)) + return; + + spin_lock(&pids_lock); p = radix_tree_lookup(&pids, pid); if (p == current) - return; + goto out; if (p) radix_tree_delete(&pids, pid); @@ -69,7 +74,9 @@ void f2fs_trace_pid(struct page *page) trace_printk("%3x:%3x %4x %-16s\n", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), pid, current->comm); - +out: + spin_unlock(&pids_lock); + radix_tree_preload_end(); } void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) @@ -108,3 +115,8 @@ void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush) last_io.len = 1; return; } + +void f2fs_build_trace_ios(void) +{ + spin_lock_init(&pids_lock); +} diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index aa6663be528c..eb39fa08cd45 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -34,9 +34,11 @@ struct last_io_info { extern void f2fs_trace_pid(struct page *); extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int); +extern void f2fs_build_trace_ios(void); #else #define f2fs_trace_pid(p) #define f2fs_trace_ios(p, i, n) +#define f2fs_build_trace_ios() #endif #endif /* __F2FS_TRACE_H__ */ From 351f4fba843db3303a92897b21a3d4dff4b6c717 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 7 Jan 2015 14:09:48 -0800 Subject: [PATCH 31/64] f2fs: add f2fs_destroy_trace_ios to free radix tree This patch removes radix tree after finishing tracing IOs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + fs/f2fs/trace.c | 37 +++++++++++++++++++++++++++++++++++++ fs/f2fs/trace.h | 2 ++ 3 files changed, 40 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e6f035c868d3..0e97974bbbbd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1272,6 +1272,7 @@ static void __exit exit_f2fs_fs(void) destroy_node_manager_caches(); destroy_inodecache(); kset_unregister(f2fs_kset); + f2fs_destroy_trace_ios(); } module_init(init_f2fs_fs) diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 92fa38a47e63..b3570dcca945 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "f2fs.h" #include "trace.h" @@ -120,3 +121,39 @@ void f2fs_build_trace_ios(void) { spin_lock_init(&pids_lock); } + +#define PIDVEC_SIZE 128 +static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index, + unsigned int max_items) +{ + struct radix_tree_iter iter; + void **slot; + unsigned int ret = 0; + + if (unlikely(!max_items)) + return 0; + + radix_tree_for_each_slot(slot, &pids, &iter, first_index) { + results[ret] = iter.index; + if (++ret == PIDVEC_SIZE) + break; + } + return ret; +} + +void f2fs_destroy_trace_ios(void) +{ + pid_t pid[PIDVEC_SIZE]; + pid_t next_pid = 0; + unsigned int found; + + spin_lock(&pids_lock); + while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) { + unsigned idx; + + next_pid = pid[found - 1] + 1; + for (idx = 0; idx < found; idx++) + radix_tree_delete(&pids, pid[idx]); + } + spin_unlock(&pids_lock); +} diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index eb39fa08cd45..1041dbeb52ae 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -35,10 +35,12 @@ struct last_io_info { extern void f2fs_trace_pid(struct page *); extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int); extern void f2fs_build_trace_ios(void); +extern void f2fs_destroy_trace_ios(void); #else #define f2fs_trace_pid(p) #define f2fs_trace_ios(p, i, n) #define f2fs_build_trace_ios() +#define f2fs_destroy_trace_ios() #endif #endif /* __F2FS_TRACE_H__ */ From 08e4126e1e91b01bea5c71980e7008ec5075d701 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Fri, 9 Jan 2015 03:38:38 +0800 Subject: [PATCH 32/64] f2fs: pids_lock can be static fs/f2fs/trace.c:19:12: sparse: symbol 'pids_lock' was not declared. Should it be static? Signed-off-by: Fengguang Wu Signed-off-by: Jaegeuk Kim --- fs/f2fs/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index b3570dcca945..ce01a2c903bd 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -17,7 +17,7 @@ #include "trace.h" RADIX_TREE(pids, GFP_ATOMIC); -spinlock_t pids_lock; +static spinlock_t pids_lock; struct last_io_info last_io; static inline void __print_last_io(void) From 871f599f4a869d24ef98b0217f19f0cc55ff59ac Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 9 Jan 2015 16:27:17 -0800 Subject: [PATCH 33/64] f2fs: avoid infinite loop on cp_error If cp_error is set, we should avoid all the infinite loop. In f2fs_sync_file, there is a hole, and this patch fixes that. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5df336757d6c..710adc987937 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -247,6 +247,10 @@ go_write: sync_nodes: sync_node_pages(sbi, ino, &wbc); + /* if cp_error was enabled, we should avoid infinite loop */ + if (unlikely(f2fs_cp_error(sbi))) + goto out; + if (need_inode_block_update(sbi, ino)) { mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); From 9066c6a7eb5492dda0ccd48bb78be52feff9043c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 10 Jan 2015 20:09:52 +0800 Subject: [PATCH 34/64] f2fs: fix wrong memory footprint statistics in debugfs Our value of memory footprint statistics showed in debugfs is not calculated correctly. Fix it in this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 1f0fb58072e7..b45daab97f0b 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -163,13 +163,20 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); - /* build gc */ - si->base_mem += sizeof(struct f2fs_gc_kthread); - get_cache: + si->cache_mem = 0; + + /* build gc */ + if (sbi->gc_thread) + si->cache_mem += sizeof(struct f2fs_gc_kthread); + + /* build merge flush thread */ + if (SM_I(sbi)->cmd_control_info) + si->cache_mem += sizeof(struct flush_cmd_control); + /* free nids */ - si->cache_mem = NM_I(sbi)->fcnt; - si->cache_mem += NM_I(sbi)->nat_cnt; + si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid); + si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); npages = NODE_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; npages = META_MAPPING(sbi)->nrpages; From 6f0aacbc3c1d71078d0f9eb47f8c422bb58fffd7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 10 Jan 2015 21:37:36 -0800 Subject: [PATCH 35/64] f2fs: update memory footprint information This patch adds missing memory usages, and splits them in detail. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 24 +++++++++++++++++------- fs/f2fs/f2fs.h | 2 +- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index b45daab97f0b..0f721f6a1147 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -177,13 +177,18 @@ get_cache: /* free nids */ si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid); si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); - npages = NODE_MAPPING(sbi)->nrpages; - si->cache_mem += npages << PAGE_CACHE_SHIFT; - npages = META_MAPPING(sbi)->nrpages; - si->cache_mem += npages << PAGE_CACHE_SHIFT; + si->cache_mem += NM_I(sbi)->dirty_nat_cnt * + sizeof(struct nat_entry_set); + si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); for (i = 0; i <= UPDATE_INO; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); + + si->page_mem = 0; + npages = NODE_MAPPING(sbi)->nrpages; + si->page_mem += npages << PAGE_CACHE_SHIFT; + npages = META_MAPPING(sbi)->nrpages; + si->page_mem += npages << PAGE_CACHE_SHIFT; } static int stat_show(struct seq_file *s, void *v) @@ -301,9 +306,14 @@ static int stat_show(struct seq_file *s, void *v) /* memory footprint */ update_mem_info(si->sbi); - seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", - (si->base_mem + si->cache_mem) >> 10, - si->base_mem >> 10, si->cache_mem >> 10); + seq_printf(s, "\nMemory: %u KB\n", + (si->base_mem + si->cache_mem + si->page_mem) >> 10); + seq_printf(s, " - static: %u KB\n", + si->base_mem >> 10); + seq_printf(s, " - cached: %u KB\n", + si->cache_mem >> 10); + seq_printf(s, " - paged : %u KB\n", + si->page_mem >> 10); } mutex_unlock(&f2fs_stat_mutex); return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 209532cbee43..c2cf040d5284 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1530,7 +1530,7 @@ struct f2fs_stat_info { unsigned int segment_count[2]; unsigned int block_count[2]; unsigned int inplace_count; - unsigned base_mem, cache_mem; + unsigned base_mem, cache_mem, page_mem; }; static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) From 30a5537f9a9e91937aad6a47f55683f7ce0be257 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Jan 2015 16:34:24 -0800 Subject: [PATCH 36/64] f2fs: trigger correct checkpoint during umount This patch fixes to trigger checkpoint with umount flag when kill_sb was called. In kill_sb, f2fs_sync_fs was finally called, but at this time, f2fs can't do checkpoint with CP_UMOUNT. After then, f2fs_put_super is not doing checkpoint, since it is not dirty. So, this patch adds a flag to indicate f2fs_sync_fs is called during umount. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c2cf040d5284..1795ce231118 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -525,6 +525,7 @@ struct f2fs_sb_info { struct f2fs_super_block *raw_super; /* raw super block pointer */ int s_dirty; /* dirty flag for checkpoint */ bool need_fsck; /* need fsck.f2fs to fix */ + bool s_closing; /* specify unmounting */ /* for node-related operations */ struct f2fs_nm_info *nm_info; /* node manager */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0e97974bbbbd..84f95cd4076a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -487,7 +487,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (sync) { struct cp_control cpc; - cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; + cpc.reason = (test_opt(sbi, FASTBOOT) || sbi->s_closing) ? + CP_UMOUNT : CP_SYNC; mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); @@ -1190,11 +1191,18 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); } +static void kill_f2fs_super(struct super_block *sb) +{ + if (sb->s_root) + F2FS_SB(sb)->s_closing = true; + kill_block_super(sb); +} + static struct file_system_type f2fs_fs_type = { .owner = THIS_MODULE, .name = "f2fs", .mount = f2fs_mount, - .kill_sb = kill_block_super, + .kill_sb = kill_f2fs_super, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("f2fs"); From 85dc2f2c6c84e99e9864ef660f79683aaad85f42 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Jan 2015 17:41:41 -0800 Subject: [PATCH 37/64] f2fs: do checkpoint when umount flag is not set If the previous checkpoint was done without CP_UMOUNT flag, it needs to do checkpoint with CP_UMOUNT for the next fast boot. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 ++- fs/f2fs/super.c | 9 +++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9f5317c9ad72..231d8c9fea0a 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1043,7 +1043,8 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); - if (!sbi->s_dirty && cpc->reason != CP_DISCARD) + if (!sbi->s_dirty && + cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT) goto out; if (unlikely(f2fs_cp_error(sbi))) goto out; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 84f95cd4076a..0d627f2d1828 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -447,8 +447,13 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_stats(sbi); stop_gc_thread(sbi); - /* We don't need to do checkpoint when it's clean */ - if (sbi->s_dirty) { + /* + * We don't need to do checkpoint when superblock is clean. + * But, the previous checkpoint was not done by umount, it needs to do + * clean checkpoint again. + */ + if (sbi->s_dirty || + !is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) { struct cp_control cpc = { .reason = CP_UMOUNT, }; From 1601839e9e5bd5726d744c9c5919f87dc808bbcc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 19 Jan 2015 20:24:37 +0800 Subject: [PATCH 38/64] f2fs: fix to release count of meta page in ->invalidatepage We will encounter deadloop in below scenario: 1. increase page count for F2FS_DIRTY_META type in following path: ->recover_fsync_data ->recover_data ->do_recover_data ->recover_data_page ->change_curseg ->write_sum_page ->set_page_dirty 2. fail in recover_data() 3. invalidate meta pages in truncate_inode_pages_final without decreasing page count. 4. deadloop when sync_meta_pages as page count will always be non-zero. message: NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [] pagevec_lookup_tag+0x27/0x30 [] sync_meta_pages+0x87/0x160 [f2fs] [] recover_fsync_data+0xeb9/0xf10 [f2fs] [] f2fs_fill_super+0x888/0x980 [f2fs] [] mount_bdev+0x16a/0x1a0 [] f2fs_mount+0x1f/0x30 [f2fs] [] mount_fs+0x36/0x170 [] vfs_kern_mount+0x55/0xe0 [] do_mount+0x1df/0x9f0 [] SyS_mount+0x70/0xb0 [] sysenter_do_call+0x12/0x12 To avoid page count leak, let's add ->invalidatepage and ->releasepage in f2fs_meta_aops as f2fs_node_aops to release meta page count correctly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 231d8c9fea0a..79f82819086f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -302,16 +302,35 @@ static int f2fs_set_meta_page_dirty(struct page *page) if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); + SetPagePrivate(page); f2fs_trace_pid(page); return 1; } return 0; } +static void f2fs_invalidate_meta_page(struct page *page, unsigned int offset, + unsigned int length) +{ + struct inode *inode = page->mapping->host; + + if (PageDirty(page)) + dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_META); + ClearPagePrivate(page); +} + +static int f2fs_release_meta_page(struct page *page, gfp_t wait) +{ + ClearPagePrivate(page); + return 1; +} + const struct address_space_operations f2fs_meta_aops = { .writepage = f2fs_write_meta_page, .writepages = f2fs_write_meta_pages, .set_page_dirty = f2fs_set_meta_page_dirty, + .invalidatepage = f2fs_invalidate_meta_page, + .releasepage = f2fs_release_meta_page, }; static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) From bc4a1f873b9db010f5b0971ee5f2987d9be32c36 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 22 Jan 2015 14:48:28 -0800 Subject: [PATCH 39/64] f2fs: leave comment for code readability During the recovery, any xattr blocks should not be found, since they are written into cold log, not the warm node chain. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c4211a5862df..57603a7127f7 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -346,6 +346,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (IS_INODE(page)) { recover_inline_xattr(inode, page); } else if (f2fs_has_xattr_block(ofs_of_node(page))) { + /* + * Deprecated; xattr blocks should be found from cold log. + * But, we should remain this for backward compatibility. + */ recover_xattr_data(inode, page, blkaddr); goto out; } From d49f3e890290bd1db047d02335401026d1886472 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 23 Jan 2015 20:36:04 +0800 Subject: [PATCH 40/64] f2fs: add F2FS_IOC_GETVERSION support In this patch we add the FS_IOC_GETVERSION ioctl for getting i_generation from inode, after that, users can list file's generation number by using "lsattr -v". Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1795ce231118..5e16085d98d3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -202,6 +202,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, */ #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS +#define F2FS_IOC_GETVERSION FS_IOC_GETVERSION #define F2FS_IOCTL_MAGIC 0xf5 #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 710adc987937..ec17d05d6553 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -926,6 +926,13 @@ out: return ret; } +static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + + return put_user(inode->i_generation, (int __user *)arg); +} + static int f2fs_ioc_start_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); @@ -1061,6 +1068,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_getflags(filp, arg); case F2FS_IOC_SETFLAGS: return f2fs_ioc_setflags(filp, arg); + case F2FS_IOC_GETVERSION: + return f2fs_ioc_getversion(filp, arg); case F2FS_IOC_START_ATOMIC_WRITE: return f2fs_ioc_start_atomic_write(filp); case F2FS_IOC_COMMIT_ATOMIC_WRITE: From f28e503429644a794d9bf2793189ff0b0887eb21 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 23 Jan 2015 20:37:53 +0800 Subject: [PATCH 41/64] f2fs: use f2fs_radix_tree_insert to clean codes No modification in functionality, just clean codes with f2fs_radix_tree_insert. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 40887d3c9d01..67860b6712f3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -356,11 +356,8 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) } new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); new_ie->inode = inode; -retry: - if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { - cond_resched(); - goto retry; - } + + f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); list_add_tail(&new_ie->list, &gc_list->ilist); } From 3b6709b771b3335856643df8f6e0158db150a438 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 24 Jan 2015 17:06:25 +0800 Subject: [PATCH 42/64] f2fs: fix a bug of inheriting default ACL from parent Introduced by a6dda0e63e97122ce9e0ba04367e37cca28315fa "f2fs: use generic posix ACL infrastructure". When testing default acl, gets in recent kernel (3.19.0-rc5), user::rwx group::r-x other::r-x default:user::rwx default:group::r-x default:group:root:rwx default:mask::rwx default:other::r-x ]# getfacl testdir/ user::rwx group::rwx // missing an acl "group:root:rwx" inherited from parent other::r-x default:user::rwx default:group::r-x default:group:root:rwx default:mask::rwx default:other::r-x Signed-off-by: Kinglong Mee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 7f12d28ad94c..742202779bd5 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -396,7 +396,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, posix_acl_release(default_acl); } if (acl) { - if (error) + if (!error) error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage); posix_acl_release(acl); From feeb0debfb3454726ebea3871cecac35e144d1bb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Jan 2015 20:22:55 +0800 Subject: [PATCH 43/64] f2fs: make truncate_inline_date static 1. make truncate_inline_date static; 2. remove parameter @from of truncate_inline_date as callers only pass zero. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - fs/f2fs/inline.c | 25 +++++++++---------------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5e16085d98d3..9e1fcc1f64ee 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1649,7 +1649,6 @@ int f2fs_read_inline_data(struct inode *, struct page *); int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); int f2fs_convert_inline_inode(struct inode *); int f2fs_write_inline_data(struct inode *, struct page *); -void truncate_inline_data(struct page *, u64); bool recover_inline_data(struct inode *, struct page *); struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, struct page **); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index fa2aa2f7bb96..1484c00133cd 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -50,6 +50,12 @@ void read_inline_data(struct page *page, struct page *ipage) SetPageUptodate(page); } +static void truncate_inline_data(struct page *ipage) +{ + f2fs_wait_on_page_writeback(ipage, NODE); + memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA); +} + int f2fs_read_inline_data(struct inode *inode, struct page *page) { struct page *ipage; @@ -125,7 +131,7 @@ no_update: set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ - truncate_inline_data(dn->inode_page, 0); + truncate_inline_data(dn->inode_page); clear_out: stat_dec_inline_inode(dn->inode); f2fs_clear_inline_inode(dn->inode); @@ -198,19 +204,6 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) return 0; } -void truncate_inline_data(struct page *ipage, u64 from) -{ - void *addr; - - if (from >= MAX_INLINE_DATA) - return; - - f2fs_wait_on_page_writeback(ipage, NODE); - - addr = inline_data_addr(ipage); - memset(addr + from, 0, MAX_INLINE_DATA - from); -} - bool recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -252,7 +245,7 @@ process_inline: if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - truncate_inline_data(ipage, 0); + truncate_inline_data(ipage); f2fs_clear_inline_inode(inode); update_inode(inode, ipage); f2fs_put_page(ipage, 1); @@ -370,7 +363,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, set_page_dirty(page); /* clear inline dir and flag after data writeback */ - truncate_inline_data(ipage, 0); + truncate_inline_data(ipage); stat_dec_inline_dir(dir); clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); From 88dd8934194f6d1db7f824c03d1eee169cb891b0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Jan 2015 20:24:21 +0800 Subject: [PATCH 44/64] f2fs: clean up {in,de}create_sleep_time Use pointer parameter @wait to pass result in {in,de}create_sleep_time for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 8 ++++---- fs/f2fs/gc.h | 28 ++++++++++++++-------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 67860b6712f3..ba89e27f394f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -44,7 +44,7 @@ static int gc_thread_func(void *data) break; if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { - wait_ms = increase_sleep_time(gc_th, wait_ms); + increase_sleep_time(gc_th, &wait_ms); continue; } @@ -65,15 +65,15 @@ static int gc_thread_func(void *data) continue; if (!is_idle(sbi)) { - wait_ms = increase_sleep_time(gc_th, wait_ms); + increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); continue; } if (has_enough_invalid_blocks(sbi)) - wait_ms = decrease_sleep_time(gc_th, wait_ms); + decrease_sleep_time(gc_th, &wait_ms); else - wait_ms = increase_sleep_time(gc_th, wait_ms); + increase_sleep_time(gc_th, &wait_ms); stat_inc_bggc_count(sbi); diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 524543a6a34a..d5ff97c5e394 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -66,26 +66,26 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; } -static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) +static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th, + long *wait) { - if (wait == gc_th->no_gc_sleep_time) - return wait; + if (*wait == gc_th->no_gc_sleep_time) + return; - wait += gc_th->min_sleep_time; - if (wait > gc_th->max_sleep_time) - wait = gc_th->max_sleep_time; - return wait; + *wait += gc_th->min_sleep_time; + if (*wait > gc_th->max_sleep_time) + *wait = gc_th->max_sleep_time; } -static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) +static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, + long *wait) { - if (wait == gc_th->no_gc_sleep_time) - wait = gc_th->max_sleep_time; + if (*wait == gc_th->no_gc_sleep_time) + *wait = gc_th->max_sleep_time; - wait -= gc_th->min_sleep_time; - if (wait <= gc_th->min_sleep_time) - wait = gc_th->min_sleep_time; - return wait; + *wait -= gc_th->min_sleep_time; + if (*wait <= gc_th->min_sleep_time) + *wait = gc_th->min_sleep_time; } static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) From caf0047e7e1e60a7ad1d655d3b81b32e2dfb6095 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 28 Jan 2015 17:48:42 +0800 Subject: [PATCH 45/64] f2fs: merge flags in struct f2fs_sb_info Currently, there are several variables with Boolean type as below: struct f2fs_sb_info { ... int s_dirty; bool need_fsck; bool s_closing; ... bool por_doing; ... } For this there are some issues: 1. there are some space of f2fs_sb_info is wasted due to aligning after Boolean type variables by compiler. 2. if we continuously add new flag into f2fs_sb_info, structure will be messed up. So in this patch, we try to: 1. switch s_dirty to Boolean type variable since it has two status 0/1. 2. merge s_dirty/need_fsck/s_closing/por_doing variables into s_flag. 3. introduce an enum type which can indicate different states of sbi. 4. use new introduced universal interfaces is_sbi_flag_set/{set,clear}_sbi_flag to operate flags for sbi. After that, above issues will be fixed. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 12 ++++++------ fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 30 ++++++++++++++++++++---------- fs/f2fs/node.c | 4 ++-- fs/f2fs/recovery.c | 4 ++-- fs/f2fs/segment.h | 10 +++++----- fs/f2fs/super.c | 15 ++++++++------- include/trace/events/f2fs.h | 4 ++-- 8 files changed, 46 insertions(+), 35 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 79f82819086f..19021414d195 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -190,7 +190,7 @@ static int f2fs_write_meta_page(struct page *page, trace_f2fs_writepage(page, META); - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; @@ -485,7 +485,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) return; - sbi->por_doing = true; + set_sbi_flag(sbi, SBI_POR_DOING); start_blk = __start_cp_addr(sbi) + 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); @@ -506,7 +506,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) } /* clear Orphan Flag */ clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); - sbi->por_doing = false; + clear_sbi_flag(sbi, SBI_POR_DOING); return; } @@ -973,7 +973,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (sbi->need_fsck) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) set_ckpt_flags(ckpt, CP_FSCK_FLAG); /* update SIT/NAT bitmap */ @@ -1047,7 +1047,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return; clear_prefree_segments(sbi); - F2FS_RESET_SB_DIRT(sbi); + clear_sbi_flag(sbi, SBI_IS_DIRTY); } /* @@ -1062,7 +1062,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); - if (!sbi->s_dirty && + if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT) goto out; if (unlikely(f2fs_cp_error(sbi))) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a7b905cb05f8..6d9e6e4ce439 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -818,7 +818,7 @@ static int f2fs_write_data_page(struct page *page, zero_user_segment(page, offset, PAGE_CACHE_SIZE); write: - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (f2fs_is_drop_cache(inode)) goto out; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9e1fcc1f64ee..5abe0836c179 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -28,7 +28,7 @@ do { \ if (unlikely(condition)) { \ WARN_ON(1); \ - sbi->need_fsck = true; \ + set_sbi_flag(sbi, SBI_NEED_FSCK); \ } \ } while (0) #define f2fs_down_write(x, y) down_write(x) @@ -519,14 +519,20 @@ struct inode_management { unsigned long ino_num; /* number of entries */ }; +/* For s_flag in struct f2fs_sb_info */ +enum { + SBI_IS_DIRTY, /* dirty flag for checkpoint */ + SBI_IS_CLOSE, /* specify unmounting */ + SBI_NEED_FSCK, /* need fsck.f2fs to fix */ + SBI_POR_DOING, /* recovery is doing or not */ +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct buffer_head *raw_super_buf; /* buffer head of raw sb */ struct f2fs_super_block *raw_super; /* raw super block pointer */ - int s_dirty; /* dirty flag for checkpoint */ - bool need_fsck; /* need fsck.f2fs to fix */ - bool s_closing; /* specify unmounting */ + int s_flag; /* flags for sbi */ /* for node-related operations */ struct f2fs_nm_info *nm_info; /* node manager */ @@ -546,7 +552,6 @@ struct f2fs_sb_info { struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ struct mutex writepages; /* mutex for writepages() */ - bool por_doing; /* recovery is doing or not */ wait_queue_head_t cp_wait; struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ @@ -699,14 +704,19 @@ static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi) return sbi->node_inode->i_mapping; } -static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) +static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type) { - sbi->s_dirty = 1; + return sbi->s_flag & (0x01 << type); } -static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) +static inline void set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) { - sbi->s_dirty = 0; + sbi->s_flag |= (0x01 << type); +} + +static inline void clear_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) +{ + sbi->s_flag &= ~(0x01 << type); } static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) @@ -818,7 +828,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) { atomic_inc(&sbi->nr_pages[count_type]); - F2FS_SET_SB_DIRT(sbi); + set_sbi_flag(sbi, SBI_IS_DIRTY); } static inline void inode_inc_dirty_pages(struct inode *inode) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d7c143626705..05e6faa71cff 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -588,7 +588,7 @@ static void truncate_node(struct dnode_of_data *dn) } invalidate: clear_node_page_dirty(dn->node_page); - F2FS_SET_SB_DIRT(sbi); + set_sbi_flag(sbi, SBI_IS_DIRTY); f2fs_put_page(dn->node_page, 1); @@ -1284,7 +1284,7 @@ static int f2fs_write_node_page(struct page *page, trace_f2fs_writepage(page, NODE); - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 57603a7127f7..41afb9534bbd 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -508,7 +508,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&inode_list); /* step #1: find fsynced inode numbers */ - sbi->por_doing = true; + set_sbi_flag(sbi, SBI_POR_DOING); /* prevent checkpoint */ mutex_lock(&sbi->cp_mutex); @@ -541,7 +541,7 @@ out: truncate_inode_pages_final(META_MAPPING(sbi)); } - sbi->por_doing = false; + clear_sbi_flag(sbi, SBI_POR_DOING); if (err) { discard_next_dnode(sbi, blkaddr); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7f327c0ba4e3..421d5794b0c8 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -460,7 +460,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - if (unlikely(sbi->por_doing)) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + @@ -599,13 +599,13 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) { if (segno > TOTAL_SEGS(sbi) - 1) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); } static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) { if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi)) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); } /* @@ -616,11 +616,11 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, { /* check segment usage */ if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); /* check boundary of a given segment number */ if (segno > TOTAL_SEGS(sbi) - 1) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); } #endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0d627f2d1828..c3aa72f9c8c8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -452,7 +452,7 @@ static void f2fs_put_super(struct super_block *sb) * But, the previous checkpoint was not done by umount, it needs to do * clean checkpoint again. */ - if (sbi->s_dirty || + if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || !is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) { struct cp_control cpc = { .reason = CP_UMOUNT, @@ -492,8 +492,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (sync) { struct cp_control cpc; - cpc.reason = (test_opt(sbi, FASTBOOT) || sbi->s_closing) ? - CP_UMOUNT : CP_SYNC; + cpc.reason = (test_opt(sbi, FASTBOOT) || + is_sbi_flag_set(sbi, SBI_IS_CLOSE)) ? + CP_UMOUNT : CP_SYNC; mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); @@ -895,7 +896,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->nr_pages[i], 0); sbi->dir_level = DEF_DIR_LEVEL; - sbi->need_fsck = false; + clear_sbi_flag(sbi, SBI_NEED_FSCK); } /* @@ -1006,7 +1007,7 @@ try_onemore: mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); - sbi->por_doing = false; + clear_sbi_flag(sbi, SBI_POR_DOING); spin_lock_init(&sbi->stat_lock); init_rwsem(&sbi->read_io.io_rwsem); @@ -1130,7 +1131,7 @@ try_onemore: goto free_proc; if (!retry) - sbi->need_fsck = true; + set_sbi_flag(sbi, SBI_NEED_FSCK); /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { @@ -1199,7 +1200,7 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags, static void kill_f2fs_super(struct super_block *sb) { if (sb->s_root) - F2FS_SB(sb)->s_closing = true; + set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE); kill_block_super(sb); } diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 13992f3c1445..5e1c0292250c 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -184,13 +184,13 @@ TRACE_EVENT(f2fs_sync_fs, TP_STRUCT__entry( __field(dev_t, dev) - __field(int, dirty) + __field(bool, dirty) __field(int, wait) ), TP_fast_assign( __entry->dev = sb->s_dev; - __entry->dirty = F2FS_SB(sb)->s_dirty; + __entry->dirty = is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY); __entry->wait = wait; ), From dabc4a5c60f796a8e7171272284ba077f9c1e439 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 23 Jan 2015 17:41:39 -0800 Subject: [PATCH 46/64] f2fs: fix not to drop mount options when retrying fill_super If wrong mount option was requested, f2fs tries to fill_super again. But, during the next trial, f2fs has no valid mount options, since parse_options deleted all the separators in the original string. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c3aa72f9c8c8..146e310fbf04 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -951,6 +951,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct inode *root; long err = -EINVAL; bool retry = true; + char *options = NULL; int i; try_onemore: @@ -982,9 +983,15 @@ try_onemore: set_opt(sbi, POSIX_ACL); #endif /* parse mount options */ - err = parse_options(sb, (char *)data); - if (err) + options = kstrdup((const char *)data, GFP_KERNEL); + if (data && !options) { + err = -ENOMEM; goto free_sb_buf; + } + + err = parse_options(sb, options); + if (err) + goto free_options; sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); sb->s_max_links = F2FS_LINK_MAX; @@ -1028,7 +1035,7 @@ try_onemore: if (IS_ERR(sbi->meta_inode)) { f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); - goto free_sb_buf; + goto free_options; } err = get_valid_checkpoint(sbi); @@ -1153,6 +1160,7 @@ try_onemore: if (err) goto free_kobj; } + kfree(options); return 0; free_kobj: @@ -1177,6 +1185,8 @@ free_cp: free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); +free_options: + kfree(options); free_sb_buf: brelse(raw_super_buf); free_sbi: From 2d834bf9ac8e40d9ae2a2dcde2348bd028f87ec4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 23 Jan 2015 18:33:46 -0800 Subject: [PATCH 47/64] f2fs: support norecovery mount option This patch adds a mount option, norecovery, which is mostly same as disable_roll_forward. The only difference is that norecovery should be activated with read-only mount option. This can be used when user wants to check whether f2fs is mountable or not without any recovery process. (e.g., xfstests/200) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 2 ++ fs/f2fs/super.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index e0950c483c22..6758aa358475 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -106,6 +106,8 @@ background_gc=%s Turn on/off cleaning operations, namely garbage Default value for this option is on. So garbage collection is on by default. disable_roll_forward Disable the roll-forward recovery routine +norecovery Disable the roll-forward recovery routine, mounted read- + only (i.e., -o ro,disable_roll_forward) discard Issue discard/TRIM commands when a segment is cleaned. no_heap Disable heap-style segment allocation which finds free segments for data from the beginning of main area, while diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 146e310fbf04..06d903b5d6c8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -42,6 +42,7 @@ static struct kset *f2fs_kset; enum { Opt_gc_background, Opt_disable_roll_forward, + Opt_norecovery, Opt_discard, Opt_noheap, Opt_user_xattr, @@ -62,6 +63,7 @@ enum { static match_table_t f2fs_tokens = { {Opt_gc_background, "background_gc=%s"}, {Opt_disable_roll_forward, "disable_roll_forward"}, + {Opt_norecovery, "norecovery"}, {Opt_discard, "discard"}, {Opt_noheap, "no_heap"}, {Opt_user_xattr, "user_xattr"}, @@ -287,6 +289,12 @@ static int parse_options(struct super_block *sb, char *options) case Opt_disable_roll_forward: set_opt(sbi, DISABLE_ROLL_FORWARD); break; + case Opt_norecovery: + /* this option mounts f2fs with ro */ + set_opt(sbi, DISABLE_ROLL_FORWARD); + if (!f2fs_readonly(sb)) + return -EINVAL; + break; case Opt_discard: set_opt(sbi, DISCARD); break; From 11504a8e7e20e2ff2e68176850e9b83b8bae760e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 23 Jan 2015 18:43:45 -0800 Subject: [PATCH 48/64] f2fs: avoid write_checkpoint if f2fs is mounted readonly Do not change any partition when f2fs is changed to readonly mode. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 19021414d195..22165fb1d0d1 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1067,6 +1067,8 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) goto out; if (unlikely(f2fs_cp_error(sbi))) goto out; + if (f2fs_readonly(sbi->sb)) + goto out; if (block_operations(sbi)) goto out; From 119ee9144534141822462e3e8a5ccc8dc537f712 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 29 Jan 2015 11:45:33 -0800 Subject: [PATCH 49/64] f2fs: split UMOUNT and FASTBOOT flags This patch adds FASTBOOT flag into checkpoint as follows. - CP_UMOUNT_FLAG is set when system is umounted. - CP_FASTBOOT_FLAG is set when intermediate checkpoint having node summaries was done. So, if you get CP_UMOUNT_FLAG from checkpoint, the system was umounted cleanly. Instead, if there was sudden-power-off, you can get CP_FASTBOOT_FLAG or nothing. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 19 +++++++++++++------ fs/f2fs/f2fs.h | 23 +++++++++++++++++++++++ fs/f2fs/gc.c | 3 +-- fs/f2fs/segment.c | 11 +++++------ fs/f2fs/super.c | 5 ++--- include/linux/f2fs_fs.h | 1 + include/trace/events/f2fs.h | 1 + 7 files changed, 46 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 22165fb1d0d1..f7cdcad31943 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -956,17 +956,24 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); - if (cpc->reason == CP_UMOUNT) { - set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + if (__remain_node_summaries(cpc->reason)) ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ cp_payload_blks + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); - } else { - clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + else ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + cp_payload_blks + data_sum_blocks + orphan_blocks); - } + + if (cpc->reason == CP_UMOUNT) + set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + else + clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + + if (cpc->reason == CP_FASTBOOT) + set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); + else + clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); if (orphan_num) set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); @@ -1010,7 +1017,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) write_data_summaries(sbi, start_blk); start_blk += data_sum_blocks; - if (cpc->reason == CP_UMOUNT) { + if (__remain_node_summaries(cpc->reason)) { write_node_summaries(sbi, start_blk); start_blk += NR_CURSEG_NODE_TYPE; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5abe0836c179..8231a599a305 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -100,6 +100,7 @@ enum { enum { CP_UMOUNT, + CP_FASTBOOT, CP_SYNC, CP_DISCARD, }; @@ -764,6 +765,28 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) up_write(&sbi->cp_rwsem); } +static inline int __get_cp_reason(struct f2fs_sb_info *sbi) +{ + int reason = CP_SYNC; + + if (test_opt(sbi, FASTBOOT)) + reason = CP_FASTBOOT; + if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) + reason = CP_UMOUNT; + return reason; +} + +static inline bool __remain_node_summaries(int reason) +{ + return (reason == CP_UMOUNT || reason == CP_FASTBOOT); +} + +static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) +{ + return (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) || + is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FASTBOOT_FLAG)); +} + /* * Check whether the given nid is within node id range. */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ba89e27f394f..76adbc3641f1 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -698,8 +698,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) .iroot = RADIX_TREE_INIT(GFP_NOFS), }; - cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; - + cpc.reason = __get_cp_reason(sbi); gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 31c4e5702c7d..5ea57ec153d1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1401,7 +1401,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) segno = le32_to_cpu(ckpt->cur_data_segno[type]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - CURSEG_HOT_DATA]); - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + if (__exist_node_summaries(sbi)) blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); else blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); @@ -1410,7 +1410,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) CURSEG_HOT_NODE]); blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - CURSEG_HOT_NODE]); - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + if (__exist_node_summaries(sbi)) blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, type - CURSEG_HOT_NODE); else @@ -1421,7 +1421,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) sum = (struct f2fs_summary_block *)page_address(new); if (IS_NODESEG(type)) { - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { + if (__exist_node_summaries(sbi)) { struct f2fs_summary *ns = &sum->entries[0]; int i; for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { @@ -1470,7 +1470,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) type = CURSEG_HOT_NODE; } - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) + if (__exist_node_summaries(sbi)) ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), NR_CURSEG_TYPE - type, META_CP); @@ -1567,8 +1567,7 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) - write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); + write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); } int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 06d903b5d6c8..bfeab3c81a48 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -500,9 +500,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (sync) { struct cp_control cpc; - cpc.reason = (test_opt(sbi, FASTBOOT) || - is_sbi_flag_set(sbi, SBI_IS_CLOSE)) ? - CP_UMOUNT : CP_SYNC; + cpc.reason = __get_cp_reason(sbi); + mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index e993b0bc9abf..09805720f4bf 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -87,6 +87,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_FASTBOOT_FLAG 0x00000020 #define CP_FSCK_FLAG 0x00000010 #define CP_ERROR_FLAG 0x00000008 #define CP_COMPACT_SUM_FLAG 0x00000004 diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 5e1c0292250c..69629826c2ba 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -72,6 +72,7 @@ #define show_cpreason(type) \ __print_symbolic(type, \ { CP_UMOUNT, "Umount" }, \ + { CP_FASTBOOT, "Fastboot" }, \ { CP_SYNC, "Sync" }, \ { CP_DISCARD, "Discard" }) From 081d78c2fc4ab2fef4cdf1100dd22155c73f8657 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 23 Jan 2015 19:16:59 -0800 Subject: [PATCH 50/64] f2fs: should fail mount when trying to recover data on read-only dev If device is read-only, we should not proceed data recovery. But, if the previous checkpoint was done by normal clean shutdown, it's safe to proceed the recovery, since there will be no data to be recovered. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bfeab3c81a48..1e92c2ea6bc1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1149,6 +1149,15 @@ try_onemore: /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { + /* + * mount should be failed, when device has readonly mode, and + * previous checkpoint was not done by clean system shutdown. + */ + if (bdev_read_only(sb->s_bdev) && + !is_set_ckpt_flags(sbi->ckpt, CP_UMOUNT_FLAG)) { + err = -EROFS; + goto free_kobj; + } err = recover_fsync_data(sbi); if (err) { f2fs_msg(sb, KERN_ERR, From f68daeebba5a697f31f64c07b8693fa678981819 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 30 Jan 2015 11:39:08 -0800 Subject: [PATCH 51/64] f2fs: keep PagePrivate during releasepage If PagePrivate is removed by releasepage, f2fs loses counting dirty pages. e.g., try_to_release_page will not release page when the page is dirty, but our releasepage removes PagePrivate. [] try_to_release_page+0x35/0x50 [] invalidate_inode_pages2_range+0x2f9/0x3b0 [] ? truncate_blocks+0x384/0x4d0 [f2fs] [] ? f2fs_direct_IO+0x283/0x290 [f2fs] [] ? get_data_block_fiemap+0x20/0x20 [f2fs] [] generic_file_direct_write+0x163/0x170 [] __generic_file_write_iter+0x2a6/0x350 [] generic_file_write_iter+0x3f/0xb0 [] new_sync_write+0x81/0xb0 [] vfs_write+0xb7/0x1f0 [] SyS_write+0x49/0xb0 [] system_call_fastpath+0x16/0x1b Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++++ fs/f2fs/data.c | 4 ++++ fs/f2fs/node.c | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f7cdcad31943..470fa58606b2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -321,6 +321,10 @@ static void f2fs_invalidate_meta_page(struct page *page, unsigned int offset, static int f2fs_release_meta_page(struct page *page, gfp_t wait) { + /* If this is dirty page, keep PagePrivate */ + if (PageDirty(page)) + return 0; + ClearPagePrivate(page); return 1; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6d9e6e4ce439..27dc3fd9fb60 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1130,6 +1130,10 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, static int f2fs_release_data_page(struct page *page, gfp_t wait) { + /* If this is dirty page, keep PagePrivate */ + if (PageDirty(page)) + return 0; + ClearPagePrivate(page); return 1; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 05e6faa71cff..9feda386ed80 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1381,6 +1381,10 @@ static void f2fs_invalidate_node_page(struct page *page, unsigned int offset, static int f2fs_release_node_page(struct page *page, gfp_t wait) { + /* If this is dirty page, keep PagePrivate */ + if (PageDirty(page)) + return 0; + ClearPagePrivate(page); return 1; } From d24bdcbfc694026b5cc283cddf47e38f5a7b685d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 30 Jan 2015 16:43:11 -0800 Subject: [PATCH 52/64] f2fs: show the number of writeback pages in stat This patch adds the # of writeback pages in stat info. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 5 +++-- fs/f2fs/f2fs.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0f721f6a1147..ac2bd8e7eca9 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -40,6 +40,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_dirs = sbi->n_dirty_dirs; si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); + si->wb_pages = get_pages(sbi, F2FS_WRITEBACK); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); @@ -266,8 +267,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - inmem: %4d\n", - si->inmem_pages); + seq_printf(s, " - inmem: %4d, wb: %4d\n", + si->inmem_pages, si->wb_pages); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8231a599a305..964c240c287f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1549,7 +1549,7 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; - int bg_gc, inline_inode, inline_dir, inmem_pages; + int bg_gc, inline_inode, inline_dir, inmem_pages, wb_pages; unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; From 487261f39bcd8983f55c611e299f70f34659674b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 5 Feb 2015 17:44:29 +0800 Subject: [PATCH 53/64] f2fs: merge {invalidate,release}page for meta/node/data pages This patch merges ->{invalidate,release}page function for meta/node/data pages. After this, duplication of codes could be removed. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 24 ++---------------------- fs/f2fs/data.c | 24 ++++++++++++++++-------- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/node.c | 23 ++--------------------- 4 files changed, 22 insertions(+), 51 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 470fa58606b2..31a715b5fd5c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -309,32 +309,12 @@ static int f2fs_set_meta_page_dirty(struct page *page) return 0; } -static void f2fs_invalidate_meta_page(struct page *page, unsigned int offset, - unsigned int length) -{ - struct inode *inode = page->mapping->host; - - if (PageDirty(page)) - dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_META); - ClearPagePrivate(page); -} - -static int f2fs_release_meta_page(struct page *page, gfp_t wait) -{ - /* If this is dirty page, keep PagePrivate */ - if (PageDirty(page)) - return 0; - - ClearPagePrivate(page); - return 1; -} - const struct address_space_operations f2fs_meta_aops = { .writepage = f2fs_write_meta_page, .writepages = f2fs_write_meta_pages, .set_page_dirty = f2fs_set_meta_page_dirty, - .invalidatepage = f2fs_invalidate_meta_page, - .releasepage = f2fs_release_meta_page, + .invalidatepage = f2fs_invalidate_page, + .releasepage = f2fs_release_page, }; static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 27dc3fd9fb60..d166557ca867 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1115,20 +1115,28 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, return err; } -static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, - unsigned int length) +void f2fs_invalidate_page(struct page *page, unsigned int offset, + unsigned int length) { struct inode *inode = page->mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) + if (inode->i_ino >= F2FS_ROOT_INO(sbi) && + (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE)) return; - if (PageDirty(page)) - inode_dec_dirty_pages(inode); + if (PageDirty(page)) { + if (inode->i_ino == F2FS_META_INO(sbi)) + dec_page_count(sbi, F2FS_DIRTY_META); + else if (inode->i_ino == F2FS_NODE_INO(sbi)) + dec_page_count(sbi, F2FS_DIRTY_NODES); + else + inode_dec_dirty_pages(inode); + } ClearPagePrivate(page); } -static int f2fs_release_data_page(struct page *page, gfp_t wait) +int f2fs_release_page(struct page *page, gfp_t wait) { /* If this is dirty page, keep PagePrivate */ if (PageDirty(page)) @@ -1183,8 +1191,8 @@ const struct address_space_operations f2fs_dblock_aops = { .write_begin = f2fs_write_begin, .write_end = f2fs_write_end, .set_page_dirty = f2fs_set_data_page_dirty, - .invalidatepage = f2fs_invalidate_data_page, - .releasepage = f2fs_release_data_page, + .invalidatepage = f2fs_invalidate_page, + .releasepage = f2fs_release_page, .direct_IO = f2fs_direct_IO, .bmap = f2fs_bmap, }; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 964c240c287f..db1ff55de0bc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1520,6 +1520,8 @@ struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct page *, struct f2fs_io_info *); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); +void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); +int f2fs_release_page(struct page *, gfp_t); /* * gc.c diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9feda386ed80..ea22c3210c8a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1370,25 +1370,6 @@ static int f2fs_set_node_page_dirty(struct page *page) return 0; } -static void f2fs_invalidate_node_page(struct page *page, unsigned int offset, - unsigned int length) -{ - struct inode *inode = page->mapping->host; - if (PageDirty(page)) - dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES); - ClearPagePrivate(page); -} - -static int f2fs_release_node_page(struct page *page, gfp_t wait) -{ - /* If this is dirty page, keep PagePrivate */ - if (PageDirty(page)) - return 0; - - ClearPagePrivate(page); - return 1; -} - /* * Structure of the f2fs node operations */ @@ -1396,8 +1377,8 @@ const struct address_space_operations f2fs_node_aops = { .writepage = f2fs_write_node_page, .writepages = f2fs_write_node_pages, .set_page_dirty = f2fs_set_node_page_dirty, - .invalidatepage = f2fs_invalidate_node_page, - .releasepage = f2fs_release_node_page, + .invalidatepage = f2fs_invalidate_page, + .releasepage = f2fs_release_page, }; static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, From bba681cbb231920a786cd7303462fb2632af6f36 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 26 Jan 2015 17:41:23 -0800 Subject: [PATCH 54/64] f2fs: introduce a batched trim This patch introduces a batched trimming feature, which submits split discard commands. This is to avoid long latency due to huge trim commands. If fstrim was triggered ranging from 0 to the end of device, we should lock all the checkpoint-related mutexes, resulting in very long latency. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ Documentation/filesystems/f2fs.txt | 4 ++++ fs/f2fs/f2fs.h | 7 +++++++ fs/f2fs/segment.c | 17 ++++++++++++----- fs/f2fs/super.c | 2 ++ 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 6f9157f16725..2c4cc42006e8 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -74,3 +74,9 @@ Date: March 2014 Contact: "Jaegeuk Kim" Description: Controls the memory footprint used by f2fs. + +What: /sys/fs/f2fs//trim_sections +Date: February 2015 +Contact: "Jaegeuk Kim" +Description: + Controls the trimming rate in batch mode. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 6758aa358475..dac11d7fef27 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -199,6 +199,10 @@ Files in /sys/fs/f2fs/ checkpoint is triggered, and issued during the checkpoint. By default, it is disabled with 0. + trim_sections This parameter controls the number of sections + to be trimmed out in batch mode when FITRIM + conducts. 32 sections is set by default. + ipu_policy This parameter controls the policy of in-place updates in f2fs. There are five policies: 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index db1ff55de0bc..337204dfc5cd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -105,6 +105,10 @@ enum { CP_DISCARD, }; +#define DEF_BATCHED_TRIM_SECTIONS 32 +#define BATCHED_TRIM_SEGMENTS(sbi) \ + (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) + struct cp_control { int reason; __u64 trim_start; @@ -448,6 +452,9 @@ struct f2fs_sm_info { int nr_discards; /* # of discards in the list */ int max_discards; /* max. discards to be issued */ + /* for batched trimming */ + unsigned int trim_sections; /* # of sections to trim */ + struct list_head sit_entry_set; /* sit entry set list */ unsigned int ipu_policy; /* in-place-update policy */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5ea57ec153d1..9f278d156d88 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1066,14 +1066,19 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); cpc.reason = CP_DISCARD; - cpc.trim_start = start_segno; - cpc.trim_end = end_segno; cpc.trim_minlen = range->minlen >> sbi->log_blocksize; /* do checkpoint to issue discard commands safely */ - mutex_lock(&sbi->gc_mutex); - write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); + for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { + cpc.trim_start = start_segno; + cpc.trim_end = min_t(unsigned int, rounddown(start_segno + + BATCHED_TRIM_SEGMENTS(sbi), + sbi->segs_per_sec) - 1, end_segno); + + mutex_lock(&sbi->gc_mutex); + write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); + } out: range->len = cpc.trimmed << sbi->log_blocksize; return 0; @@ -2127,6 +2132,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->nr_discards = 0; sm_info->max_discards = 0; + sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; + INIT_LIST_HEAD(&sm_info->sit_entry_set); if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1e92c2ea6bc1..f2fe666a6ea9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -195,6 +195,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); @@ -210,6 +211,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_idle), ATTR_LIST(reclaim_segments), ATTR_LIST(max_small_discards), + ATTR_LIST(batched_trim_sections), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), From 560d4672e2f9f15aee2a9de1f1e70ef4c50f95d8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 7 Feb 2015 17:36:15 +0800 Subject: [PATCH 55/64] f2fs: fix to use highmem for pages of newly created directory In commit a78186ebe516 ("f2fs: use highmem for directory pages"), we have set __GFP_HIGHMEM into dir mapping's gfp flag in f2fs_iget, so high address memory could be used for these existing dir's page. But we forgot to set flag for newly created dir, due to this reason, our newly created dir pages could not be allocated from high address memory. Fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 547a2deeb1ac..e79639a9787a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -299,7 +299,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); set_inode_flag(F2FS_I(inode), FI_INC_LINK); f2fs_lock_op(sbi); From 2e023174a88dd14eab30fae3a1f6c97f37eb3bb8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 Feb 2015 11:23:58 +0800 Subject: [PATCH 56/64] f2fs: avoid data offset overflow when lseeking huge file xfstest generic/285 complains our issue in lseeking huge file. Here is the detail output of generic/285: "./check -f2fs tests/generic/285 Ran: generic/285 Failures: generic/285 Failed 1 of 1 tests 10. Test a huge file for offset overflow 10.01 SEEK_HOLE expected 65536 or 8589934592, got 65536. succ 10.02 SEEK_HOLE expected 65536 or 8589934592, got 65536. succ 10.03 SEEK_DATA expected 0 or 0, got 0. succ 10.04 SEEK_DATA expected 1 or 1, got 1. succ 10.05 SEEK_HOLE expected 8589934592 or 8589934592, got 0. FAIL 10.06 SEEK_DATA expected 8589869056 or 8589869056, got 8589869056. succ 10.07 SEEK_DATA expected 8589869057 or 8589869057, got 8589869057. succ 10.08 SEEK_DATA expected 8589869056 or 8589869056, got 4294901760. FAIL" The reason of this issue is: We will calculate current offset through left shifting page-offset with PAGE_CACHE_SHIFT bits, but our page-offset is a type of unsigned long, its size is 4 bytes in 32-bits machine. So if our page-offset is bigger than (1 << 32 / pagesize - 1), result of left shifting will overflow. Let's fix this issue by casting type of page-offset to type of current offset: loff_t. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ec17d05d6553..7188a2ac64b5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -357,7 +357,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) /* find data/hole in dnode block */ for (; dn.ofs_in_node < end_offset; dn.ofs_in_node++, pgofs++, - data_ofs = pgofs << PAGE_CACHE_SHIFT) { + data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) { block_t blkaddr; blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); From aaf9607516ed38825268515ef4d773289a44f429 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 6 Feb 2015 18:53:45 -0800 Subject: [PATCH 57/64] f2fs: check node page contents all the time In get_node_page, if the page is up-to-date, we assumed that the page was not reclaimed at all. But, sometimes it was reported that its contents was missing. So, just for sure, let's check its mapping and contents. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ea22c3210c8a..1e354ff9f7d8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1036,11 +1036,11 @@ repeat: err = read_node_page(page, READ_SYNC); if (err < 0) return ERR_PTR(err); - else if (err == LOCKED_PAGE) - goto got_it; + else if (err != LOCKED_PAGE) + lock_page(page); - lock_page(page); if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { + ClearPageUptodate(page); f2fs_put_page(page, 1); return ERR_PTR(-EIO); } @@ -1048,7 +1048,6 @@ repeat: f2fs_put_page(page, 1); goto repeat; } -got_it: return page; } From da17eece035d72cb50d48529744a490784f29d2f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 9 Feb 2015 10:34:38 -0800 Subject: [PATCH 58/64] f2fs: call set_buffer_new for get_block This patch fixes wrong handling of buffer_new flag in get_block. If f2fs allocates new blocks and mapped buffer_head, it needs to set buffer_new for the bh_result. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d166557ca867..26e247697e58 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -274,7 +274,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, unsigned int blkbits = inode->i_sb->s_blocksize_bits; size_t count; - clear_buffer_new(bh_result); + set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, start_blkaddr + pgofs - start_fofs); count = end_fofs - pgofs + 1; @@ -634,12 +634,14 @@ static int __get_data_block(struct inode *inode, sector_t iblock, goto put_out; if (dn.data_blkaddr != NULL_ADDR) { + set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, dn.data_blkaddr); } else if (create) { err = __allocate_data_block(&dn); if (err) goto put_out; allocated = true; + set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, dn.data_blkaddr); } else { goto put_out; From f7ef9b83b583640111039b30e13263b71c3a6ed5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 9 Feb 2015 12:02:44 -0800 Subject: [PATCH 59/64] f2fs: introduce macros to convert bytes and blocks in f2fs This patch adds two macros for transition between byte and block offsets. Currently, f2fs only supports 4KB blocks, so use the default size for now. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 +++---- fs/f2fs/file.c | 3 +-- fs/f2fs/segment.c | 8 ++++---- include/linux/f2fs_fs.h | 4 ++++ 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 31a715b5fd5c..58d88df0d632 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -981,15 +981,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* write out checkpoint buffer at block 0 */ cp_page = grab_meta_page(sbi, start_blk++); kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + memcpy(kaddr, ckpt, F2FS_BLKSIZE); set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); for (i = 1; i < 1 + cp_payload_blks; i++) { cp_page = grab_meta_page(sbi, start_blk++); kaddr = page_address(cp_page); - memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, - (1 << sbi->log_blocksize)); + memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, F2FS_BLKSIZE); set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); } @@ -1009,7 +1008,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* writeout checkpoint block */ cp_page = grab_meta_page(sbi, start_blk); kaddr = page_address(cp_page); - memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); + memcpy(kaddr, ckpt, F2FS_BLKSIZE); set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7188a2ac64b5..f3b007540a48 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -491,8 +491,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) trace_f2fs_truncate_blocks_enter(inode, from); - free_from = (pgoff_t) - ((from + blocksize - 1) >> (sbi->log_blocksize)); + free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); if (lock) f2fs_lock_op(sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9f278d156d88..877a272a8146 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1048,8 +1048,8 @@ static const struct segment_allocation default_salloc_ops = { int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { - __u64 start = range->start >> sbi->log_blocksize; - __u64 end = start + (range->len >> sbi->log_blocksize) - 1; + __u64 start = F2FS_BYTES_TO_BLK(range->start); + __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; unsigned int start_segno, end_segno; struct cp_control cpc; @@ -1066,7 +1066,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); cpc.reason = CP_DISCARD; - cpc.trim_minlen = range->minlen >> sbi->log_blocksize; + cpc.trim_minlen = F2FS_BYTES_TO_BLK(range->minlen); /* do checkpoint to issue discard commands safely */ for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { @@ -1080,7 +1080,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) mutex_unlock(&sbi->gc_mutex); } out: - range->len = cpc.trimmed << sbi->log_blocksize; + range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return 0; } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 09805720f4bf..a23556c32703 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -19,12 +19,16 @@ #define F2FS_MAX_LOG_SECTOR_SIZE 12 /* 12 bits for 4096 bytes */ #define F2FS_LOG_SECTORS_PER_BLOCK 3 /* log number for sector/blk */ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ +#define F2FS_BLKSIZE_BITS 12 /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ +#define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) +#define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) + /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 From 59b802e5a453d413e7222d179be405cbadca3a8e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 9 Feb 2015 12:09:53 -0800 Subject: [PATCH 60/64] f2fs: allocate data blocks in advance for f2fs_direct_IO This patch adds preallocation for data blocks to prepare f2fs_direct_IO. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 26e247697e58..985ed023a750 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -592,6 +592,56 @@ static int __allocate_data_block(struct dnode_of_data *dn) return 0; } +static void __allocate_data_blocks(struct inode *inode, loff_t offset, + size_t count) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + u64 start = F2FS_BYTES_TO_BLK(offset); + u64 len = F2FS_BYTES_TO_BLK(count); + bool allocated; + u64 end_offset; + + while (len) { + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); + + /* When reading holes, we need its node page */ + set_new_dnode(&dn, inode, NULL, NULL, 0); + if (get_dnode_of_data(&dn, start, ALLOC_NODE)) + goto out; + + allocated = false; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); + + while (dn.ofs_in_node < end_offset && len) { + if (dn.data_blkaddr == NULL_ADDR) { + if (__allocate_data_block(&dn)) + goto sync_out; + allocated = true; + } + len--; + start++; + dn.ofs_in_node++; + } + + if (allocated) + sync_inode_page(&dn); + + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + } + return; + +sync_out: + if (allocated) + sync_inode_page(&dn); + f2fs_put_dnode(&dn); +out: + f2fs_unlock_op(sbi); + return; +} + /* * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. * If original data blocks are allocated, then give them to blockdev. @@ -617,10 +667,8 @@ static int __get_data_block(struct inode *inode, sector_t iblock, if (check_extent_cache(inode, pgofs, bh_result)) goto out; - if (create) { - f2fs_balance_fs(F2FS_I_SB(inode)); + if (create) f2fs_lock_op(F2FS_I_SB(inode)); - } /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -1108,6 +1156,9 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, trace_f2fs_direct_IO_enter(inode, offset, count, rw); + if (rw & WRITE) + __allocate_data_blocks(inode, offset, count); + err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); if (err < 0 && (rw & WRITE)) f2fs_write_failed(mapping, offset + count); From 29e7043f405c4c4c3a82f61222790f3ea8c0bf13 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 10 Feb 2015 16:23:12 -0800 Subject: [PATCH 61/64] f2fs: fix sparse warnings This patch resolves the following warnings. include/trace/events/f2fs.h:150:1: warning: expression using sizeof bool include/trace/events/f2fs.h:180:1: warning: expression using sizeof bool include/trace/events/f2fs.h:990:1: warning: expression using sizeof bool include/trace/events/f2fs.h:990:1: warning: expression using sizeof bool include/trace/events/f2fs.h:150:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) include/trace/events/f2fs.h:180:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) include/trace/events/f2fs.h:990:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) include/trace/events/f2fs.h:990:1: warning: odd constant _Bool cast (ffffffffffffffff becomes 1) fs/f2fs/checkpoint.c:27:19: warning: symbol 'inode_entry_slab' was not declared. Should it be static? fs/f2fs/checkpoint.c:577:15: warning: cast to restricted __le32 fs/f2fs/checkpoint.c:592:15: warning: cast to restricted __le32 fs/f2fs/trace.c:19:1: warning: symbol 'pids' was not declared. Should it be static? fs/f2fs/trace.c:21:21: warning: symbol 'last_io' was not declared. Should it be static? Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/f2fs.h | 1 + fs/f2fs/gc.h | 2 -- fs/f2fs/trace.c | 4 ++-- include/trace/events/f2fs.h | 13 +++++++------ 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 58d88df0d632..7f794b72b3b7 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -574,7 +574,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp1; - crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp1; @@ -589,7 +589,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp2; - crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); + crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp2; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 337204dfc5cd..7fa3313ab0e2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1681,6 +1681,7 @@ extern const struct address_space_operations f2fs_meta_aops; extern const struct inode_operations f2fs_dir_inode_operations; extern const struct inode_operations f2fs_symlink_inode_operations; extern const struct inode_operations f2fs_special_inode_operations; +extern struct kmem_cache *inode_entry_slab; /* * inline.c diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index d5ff97c5e394..b4a65be9f7d3 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -40,8 +40,6 @@ struct gc_inode_list { struct radix_tree_root iroot; }; -extern struct kmem_cache *inode_entry_slab; - /* * inline functions */ diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index ce01a2c903bd..875aa8179bc1 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -16,9 +16,9 @@ #include "f2fs.h" #include "trace.h" -RADIX_TREE(pids, GFP_ATOMIC); +static RADIX_TREE(pids, GFP_ATOMIC); static spinlock_t pids_lock; -struct last_io_info last_io; +static struct last_io_info last_io; static inline void __print_last_io(void) { diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 69629826c2ba..5422dbfaf97d 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -149,14 +149,14 @@ DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter, TRACE_EVENT(f2fs_sync_file_exit, - TP_PROTO(struct inode *inode, bool need_cp, int datasync, int ret), + TP_PROTO(struct inode *inode, int need_cp, int datasync, int ret), TP_ARGS(inode, need_cp, datasync, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(bool, need_cp) + __field(int, need_cp) __field(int, datasync) __field(int, ret) ), @@ -185,7 +185,7 @@ TRACE_EVENT(f2fs_sync_fs, TP_STRUCT__entry( __field(dev_t, dev) - __field(bool, dirty) + __field(int, dirty) __field(int, wait) ), @@ -989,14 +989,15 @@ TRACE_EVENT(f2fs_issue_discard, TRACE_EVENT(f2fs_issue_flush, - TP_PROTO(struct super_block *sb, bool nobarrier, bool flush_merge), + TP_PROTO(struct super_block *sb, unsigned int nobarrier, + unsigned int flush_merge), TP_ARGS(sb, nobarrier, flush_merge), TP_STRUCT__entry( __field(dev_t, dev) - __field(bool, nobarrier) - __field(bool, flush_merge) + __field(unsigned int, nobarrier) + __field(unsigned int, flush_merge) ), TP_fast_assign( From 60a3b782b1aaf6e5f8c4f92e99302c48a26d475b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 10 Feb 2015 16:44:29 -0800 Subject: [PATCH 62/64] f2fs: avoid variable length array Instead of using variable length array, this patch let preallocate memory for them. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 1 + fs/f2fs/segment.c | 10 ++++++++-- fs/f2fs/segment.h | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ac2bd8e7eca9..e671373cc8ab 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -142,6 +142,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + si->base_mem += SIT_VBLOCK_MAP_SIZE; if (sbi->segs_per_sec > 1) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); si->base_mem += __bitmap_size(sbi, SIT_BITMAP); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 877a272a8146..c9d314f44568 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -505,7 +505,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); unsigned long *cur_map = (unsigned long *)se->cur_valid_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; - unsigned long dmap[entries]; + unsigned long *dmap = SIT_I(sbi)->tmp_map; unsigned int start = 0, end = -1; bool force = (cpc->reason == CP_DISCARD); int i; @@ -924,7 +924,7 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi, { struct seg_entry *se = get_seg_entry(sbi, seg->segno); int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); - unsigned long target_map[entries]; + unsigned long *target_map = SIT_I(sbi)->tmp_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; unsigned long *cur_map = (unsigned long *)se->cur_valid_map; int i, pos; @@ -1855,6 +1855,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; } + sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->tmp_map) + return -ENOMEM; + if (sbi->segs_per_sec > 1) { sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) * sizeof(struct sec_entry)); @@ -2236,6 +2240,8 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) kfree(sit_i->sentries[start].ckpt_valid_map); } } + kfree(sit_i->tmp_map); + vfree(sit_i->sentries); vfree(sit_i->sec_entries); kfree(sit_i->dirty_sentries_bitmap); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 421d5794b0c8..ba858ca01445 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -189,6 +189,7 @@ struct sit_info { char *sit_bitmap; /* SIT bitmap pointer */ unsigned int bitmap_size; /* SIT bitmap size */ + unsigned long *tmp_map; /* bitmap for temporal use */ unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ unsigned int dirty_sentries; /* # of dirty sentries */ unsigned int sents_per_block; /* # of SIT entries per block */ From f1a3b98e73a9f811ab4882669043c50c0e0dc7b6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 11 Feb 2015 11:25:11 -0800 Subject: [PATCH 63/64] f2fs: fix accessing wrong indexed data blocks This patch fixes the following test. This causes: attempt to access beyond end of device sdb2: rw=16384, want=14413962000, limit=16777216 The reason is: - f2fs_write_begin - f2fs_convert_inline_inode returns -ENOSPC - f2fs_write_failed - truncate_blocks - truncate_partial_data_page - find_data_page - get_dnode_of_data returns wrong data index retrieved from inline_data - f2fs_submit_page_bio(wrong data index) - submit_bio(wrong data index) Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1e354ff9f7d8..97bd9d3db882 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -474,7 +474,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct page *npage[4]; - struct page *parent; + struct page *parent = NULL; int offset[4]; unsigned int noffset[4]; nid_t nids[4]; @@ -491,6 +491,14 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) if (IS_ERR(npage[0])) return PTR_ERR(npage[0]); } + + /* if inline_data is set, should not report any block indices */ + if (f2fs_has_inline_data(dn->inode) && index) { + err = -EINVAL; + f2fs_put_page(npage[0], 1); + goto release_out; + } + parent = npage[0]; if (level != 0) nids[1] = get_nid(parent, offset[0], true); From 1a118ccfd60fc78e64c0a3ab9e85075545839d6e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 11 Feb 2015 18:20:38 +0800 Subject: [PATCH 64/64] f2fs: use spinlock for segmap_lock instead of rwlock rwlock can provide better concurrency when there are much more readers than writers because readers can hold the rwlock simultaneously. But now, for segmap_lock rwlock in struct free_segmap_info, there is only one reader 'mount' from below call path: ->f2fs_fill_super ->build_segment_manager ->build_dirty_segmap ->init_dirty_segmap ->find_next_inuse read_lock ... read_unlock Now that our concurrency can not be improved since there is no other reader for this lock, we do not need to use rwlock_t type for segmap_lock, let's replace it with spinlock_t type. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++--- fs/f2fs/segment.h | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c9d314f44568..daee4ab913da 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -800,7 +800,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi, int go_left = 0; int i; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { segno = find_next_zero_bit(free_i->free_segmap, @@ -873,7 +873,7 @@ got_it: f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); __set_inuse(sbi, segno); *newseg = segno; - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) @@ -1923,7 +1923,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); free_i->free_segments = 0; free_i->free_sections = 0; - rwlock_init(&free_i->segmap_lock); + spin_lock_init(&free_i->segmap_lock); return 0; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index ba858ca01445..7fd35111cf62 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -208,7 +208,7 @@ struct free_segmap_info { unsigned int start_segno; /* start segment number logically */ unsigned int free_segments; /* # of free segments */ unsigned int free_sections; /* # of free sections */ - rwlock_t segmap_lock; /* free segmap lock */ + spinlock_t segmap_lock; /* free segmap lock */ unsigned long *free_segmap; /* free segment bitmap */ unsigned long *free_secmap; /* free section bitmap */ }; @@ -319,9 +319,9 @@ static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, unsigned int max, unsigned int segno) { unsigned int ret; - read_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); ret = find_next_bit(free_i->free_segmap, max, segno); - read_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); return ret; } @@ -332,7 +332,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) unsigned int start_segno = secno * sbi->segs_per_sec; unsigned int next; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); free_i->free_segments++; @@ -341,7 +341,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) clear_bit(secno, free_i->free_secmap); free_i->free_sections++; } - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static inline void __set_inuse(struct f2fs_sb_info *sbi, @@ -363,7 +363,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int start_segno = secno * sbi->segs_per_sec; unsigned int next; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); if (test_and_clear_bit(segno, free_i->free_segmap)) { free_i->free_segments++; @@ -374,7 +374,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, free_i->free_sections++; } } - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, @@ -382,13 +382,13 @@ static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int secno = segno / sbi->segs_per_sec; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); if (!test_and_set_bit(segno, free_i->free_segmap)) { free_i->free_segments--; if (!test_and_set_bit(secno, free_i->free_secmap)) free_i->free_sections--; } - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,