From 292c196a3695e57980dc79dea3863462fa4831c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 16 Nov 2017 16:59:14 +0800 Subject: [PATCH 01/72] f2fs: reserve nid resource for quota sysfile During mkfs, quota sysfiles have already occupied nid resource, it needs to adjust remaining available nid count in kernel side. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 9 +-------- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/node.c | 2 +- fs/f2fs/super.c | 10 +++++++++- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ecada8425268..4d929627e210 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -49,14 +49,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA); si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; - - si->nquota_files = 0; - if (f2fs_sb_has_quota_ino(sbi->sb)) { - for (i = 0; i < MAXQUOTAS; i++) { - if (f2fs_qf_ino(sbi->sb, i)) - si->nquota_files++; - } - } + si->nquota_files = sbi->nquota_files; si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->aw_cnt = atomic_read(&sbi->aw_cnt); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6abf26c31d01..ef68433a2363 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1101,6 +1101,8 @@ struct f2fs_sb_info { block_t reserved_blocks; /* configurable reserved blocks */ block_t current_reserved_blocks; /* current reserved blocks */ + unsigned int nquota_files; /* # of quota sysfile */ + u32 s_next_generation; /* for NFS support */ /* # of pages, see count_type */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d3322752426f..2ce0321ab9c0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2661,7 +2661,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) /* not used nids: 0, node, meta, (and root counted as valid node) */ nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - - F2FS_RESERVED_NODE_NUM; + sbi->nquota_files - F2FS_RESERVED_NODE_NUM; nm_i->nid_cnt[FREE_NID] = 0; nm_i->nid_cnt[PREALLOC_NID] = 0; nm_i->nat_cnt = 0; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 708155d9c2e4..7ef7620b2010 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1008,7 +1008,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bavail = user_block_count - valid_user_blocks(sbi) - sbi->current_reserved_blocks; - avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; + avail_node_count = sbi->total_node_count - sbi->nquota_files - + F2FS_RESERVED_NODE_NUM; if (avail_node_count > user_block_count) { buf->f_files = user_block_count; @@ -2462,6 +2463,13 @@ try_onemore: else sb->s_qcop = &f2fs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; + + if (f2fs_sb_has_quota_ino(sbi->sb)) { + for (i = 0; i < MAXQUOTAS; i++) { + if (f2fs_qf_ino(sbi->sb, i)) + sbi->nquota_files++; + } + } #endif sb->s_op = &f2fs_sops; From 66e833613715721f347757ad32f19cb4e26e7363 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Fri, 17 Nov 2017 16:13:38 +0800 Subject: [PATCH 02/72] f2fs: no need to read nat block if nat_block_bitmap is set No need to read nat block if nat_block_bitmap is set. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2ce0321ab9c0..93bd89a9a56f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1930,9 +1930,6 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); int i; - if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) - return; - __set_bit_le(nat_ofs, nm_i->nat_block_bitmap); i = start_nid % NAT_ENTRY_PER_BLOCK; @@ -2037,10 +2034,13 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) down_read(&nm_i->nat_tree_lock); while (1) { - struct page *page = get_current_nat_page(sbi, nid); + if (!test_bit_le(NAT_BLOCK_OFFSET(nid), + nm_i->nat_block_bitmap)) { + struct page *page = get_current_nat_page(sbi, nid); - scan_nat_page(sbi, page, nid); - f2fs_put_page(page, 1); + scan_nat_page(sbi, page, nid); + f2fs_put_page(page, 1); + } nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); if (unlikely(nid >= nm_i->max_nid)) From 2ab56a59ca880eebb3b89704ca1952f388151d29 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 21 Nov 2017 17:49:54 +0800 Subject: [PATCH 03/72] f2fs: remove unneeded memory footprint accounting We forgot to remov memory footprint accounting of per-cpu type variables, fix it. Fixes: 35782b233f37 ("f2fs: remove percpu_count due to performance regression") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 4d929627e210..674f9bbe98d9 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -179,7 +179,6 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; si->base_mem += 2 * sizeof(struct f2fs_inode_info); si->base_mem += sizeof(*sbi->ckpt); - si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE; /* build sm */ si->base_mem += sizeof(struct f2fs_sm_info); From 5921aaa185908c07e1e30cbdaee417ddeb27cbf9 Mon Sep 17 00:00:00 2001 From: LiFan Date: Wed, 22 Nov 2017 16:07:23 +0800 Subject: [PATCH 04/72] f2fs: fix concurrent problem for updating free bitmap alloc_nid_failed and scan_nat_page can be called at the same time, and we haven't protected add_free_nid and update_free_nid_bitmap with the same nid_list_lock. That could lead to Thread A Thread B - __build_free_nids - scan_nat_page - add_free_nid - alloc_nid_failed - update_free_nid_bitmap - update_free_nid_bitmap scan_nat_page will clear the free bitmap since the nid is PREALLOC_NID, but alloc_nid_failed needs to set the free bitmap. This results in free nid with free bitmap cleared. This patch update the bitmap under the same nid_list_lock in add_free_nid. And use __GFP_NOFAIL to make sure to update status of free nid correctly. Signed-off-by: Fan li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 85 +++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 93bd89a9a56f..41779927bd82 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1812,8 +1812,33 @@ static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i, } } +static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, + bool set, bool build) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); + unsigned int nid_ofs = nid - START_NID(nid); + + if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) + return; + + if (set) { + if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) + return; + __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + nm_i->free_nid_count[nat_ofs]++; + } else { + if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) + return; + __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + if (!build) + nm_i->free_nid_count[nat_ofs]--; + } +} + /* return if the nid is recognized as free */ -static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) +static bool add_free_nid(struct f2fs_sb_info *sbi, + nid_t nid, bool build, bool update) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *e; @@ -1829,8 +1854,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) i->nid = nid; i->state = FREE_NID; - if (radix_tree_preload(GFP_NOFS)) - goto err; + radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); spin_lock(&nm_i->nid_list_lock); @@ -1871,9 +1895,14 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) ret = true; err = __insert_free_nid(sbi, i, FREE_NID); err_out: + if (update) { + update_free_nid_bitmap(sbi, nid, ret, build); + if (!build) + nm_i->available_nids++; + } spin_unlock(&nm_i->nid_list_lock); radix_tree_preload_end(); -err: + if (err) kmem_cache_free(free_nid_slab, i); return ret; @@ -1897,30 +1926,6 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) kmem_cache_free(free_nid_slab, i); } -static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, - bool set, bool build) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); - unsigned int nid_ofs = nid - START_NID(nid); - - if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) - return; - - if (set) { - if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) - return; - __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); - nm_i->free_nid_count[nat_ofs]++; - } else { - if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) - return; - __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); - if (!build) - nm_i->free_nid_count[nat_ofs]--; - } -} - static void scan_nat_page(struct f2fs_sb_info *sbi, struct page *nat_page, nid_t start_nid) { @@ -1935,18 +1940,18 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, i = start_nid % NAT_ENTRY_PER_BLOCK; for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { - bool freed = false; - if (unlikely(start_nid >= nm_i->max_nid)) break; blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); f2fs_bug_on(sbi, blk_addr == NEW_ADDR); - if (blk_addr == NULL_ADDR) - freed = add_free_nid(sbi, start_nid, true); - spin_lock(&NM_I(sbi)->nid_list_lock); - update_free_nid_bitmap(sbi, start_nid, freed, true); - spin_unlock(&NM_I(sbi)->nid_list_lock); + if (blk_addr == NULL_ADDR) { + add_free_nid(sbi, start_nid, true, true); + } else { + spin_lock(&NM_I(sbi)->nid_list_lock); + update_free_nid_bitmap(sbi, start_nid, false, true); + spin_unlock(&NM_I(sbi)->nid_list_lock); + } } } @@ -1964,7 +1969,7 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi) addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); nid = le32_to_cpu(nid_in_journal(journal, i)); if (addr == NULL_ADDR) - add_free_nid(sbi, nid, true); + add_free_nid(sbi, nid, true, false); else remove_free_nid(sbi, nid); } @@ -1991,7 +1996,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) break; nid = i * NAT_ENTRY_PER_BLOCK + idx; - add_free_nid(sbi, nid, true); + add_free_nid(sbi, nid, true, false); if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS) goto out; @@ -2497,11 +2502,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, nat_reset_flag(ne); __clear_nat_cache_dirty(NM_I(sbi), set, ne); if (nat_get_blkaddr(ne) == NULL_ADDR) { - add_free_nid(sbi, nid, false); - spin_lock(&NM_I(sbi)->nid_list_lock); - NM_I(sbi)->available_nids++; - update_free_nid_bitmap(sbi, nid, true, false); - spin_unlock(&NM_I(sbi)->nid_list_lock); + add_free_nid(sbi, nid, false, true); } else { spin_lock(&NM_I(sbi)->nid_list_lock); update_free_nid_bitmap(sbi, nid, false, false); From f6df8f234e2502b7d8c6de42e066e01f908318cc Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Wed, 22 Nov 2017 18:23:38 +0800 Subject: [PATCH 05/72] f2fs: introduce sysfs readdir_ra to readahead inode block in readdir This patch introduces a sysfs interface readdir_ra to enable/disable readaheading inode block in f2fs_readdir. When readdir_ra is enabled, it improves the performance of "readdir + stat". For 300,000 files: time find /data/test > /dev/null disable readdir_ra: 1m25.69s real 0m01.94s user 0m50.80s system enable readdir_ra: 0m18.55s real 0m00.44s user 0m15.39s system Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ fs/f2fs/dir.c | 4 ++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/sysfs.c | 2 ++ 4 files changed, 13 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index a7799c2fca28..d870b5514d15 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -186,3 +186,9 @@ Date: August 2017 Contact: "Jaegeuk Kim" Description: Controls sleep time of GC urgent mode + +What: /sys/fs/f2fs//readdir_ra +Date: November 2017 +Contact: "Sheng Yong" +Description: + Controls readahead inode block in readdir. diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 2d98d877c09d..724304dc6143 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -798,6 +798,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int bit_pos; struct f2fs_dir_entry *de = NULL; struct fscrypt_str de_name = FSTR_INIT(NULL, 0); + struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode); bit_pos = ((unsigned long)ctx->pos % d->max); @@ -836,6 +837,9 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, le32_to_cpu(de->ino), d_type)) return 1; + if (sbi->readdir_ra == 1) + ra_node_page(sbi, le32_to_cpu(de->ino)); + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); ctx->pos = start_pos + bit_pos; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ef68433a2363..7cb79fbebb63 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1093,6 +1093,7 @@ struct f2fs_sb_info { int dir_level; /* directory level */ int inline_xattr_size; /* inline xattr size */ unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */ + int readdir_ra; /* readahead inode in readdir */ block_t user_block_count; /* # of user blocks */ block_t total_valid_block_count; /* # of valid blocks */ diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 9835348b6e5d..93c3364250dd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -299,6 +299,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); @@ -346,6 +347,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), ATTR_LIST(iostat_enable), + ATTR_LIST(readdir_ra), #ifdef CONFIG_F2FS_FAULT_INJECTION ATTR_LIST(inject_rate), ATTR_LIST(inject_type), From 25006645d2e79422b6b3f26e5a82b6cbb5d49a0e Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Wed, 22 Nov 2017 18:23:39 +0800 Subject: [PATCH 06/72] f2fs: still write data if preallocate only partial blocks If there is not enough space left, f2fs_preallocate_blocks may only preallocte partial blocks. As a result, the write operation fails but i_blocks is not 0. To avoid this, f2fs should write data in non-preallocation way and write as many data as the size of i_blocks. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 516fa0d3ff9c..3fa20a932dd7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -862,8 +862,14 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) if (err) return err; } - if (!f2fs_has_inline_data(inode)) - return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + if (!f2fs_has_inline_data(inode)) { + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + if (map.m_len > 0 && err == -ENOSPC) { + set_inode_flag(inode, FI_NO_PREALLOC); + err = 0; + } + return err; + } return err; } From e17d488bce9ef4fe3b18d1548ea79b65f836b497 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Wed, 22 Nov 2017 18:23:40 +0800 Subject: [PATCH 07/72] f2fs: remove unused parameter Commit d260081ccf37 ("f2fs: change recovery policy of xattr node block") removes the use of blkaddr, which is no longer used. So remove the parameter. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +-- fs/f2fs/node.c | 2 +- fs/f2fs/recovery.c | 6 +++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7cb79fbebb63..0b14df0bee27 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2607,8 +2607,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); void recover_inline_xattr(struct inode *inode, struct page *page); -int recover_xattr_data(struct inode *inode, struct page *page, - block_t blkaddr); +int recover_xattr_data(struct inode *inode, struct page *page); int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); int restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 41779927bd82..95d11d250e83 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2224,7 +2224,7 @@ update_inode: f2fs_put_page(ipage, 1); } -int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) +int recover_xattr_data(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b3a14b0429f2..cbeef73bc4dd 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -404,7 +404,7 @@ truncate_out: } static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, - struct page *page, block_t blkaddr) + struct page *page) { struct dnode_of_data dn; struct node_info ni; @@ -415,7 +415,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (IS_INODE(page)) { recover_inline_xattr(inode, page); } else if (f2fs_has_xattr_block(ofs_of_node(page))) { - err = recover_xattr_data(inode, page, blkaddr); + err = recover_xattr_data(inode, page); if (!err) recovered++; goto out; @@ -568,7 +568,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, break; } } - err = do_recover_data(sbi, entry->inode, page, blkaddr); + err = do_recover_data(sbi, entry->inode, page); if (err) { f2fs_put_page(page, 1); break; From 21020812c9e1ab593367fad9ce579f842a0b406d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 23 Nov 2017 23:26:52 +0800 Subject: [PATCH 08/72] f2fs: fix lock dependency in between dio_rwsem & i_mmap_sem test/generic/208 reports a potential deadlock as below: Chain exists of: &mm->mmap_sem --> &fi->i_mmap_sem --> &fi->dio_rwsem[WRITE] Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&fi->dio_rwsem[WRITE]); lock(&fi->i_mmap_sem); lock(&fi->dio_rwsem[WRITE]); lock(&mm->mmap_sem); This patch changes the lock dependency as below in fallocate() to fix this issue: - dio_rwsem - i_mmap_sem Fixes: bb06664a534b ("f2fs: avoid race in between GC and block exchange") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7874bbd7311d..84a011a522a1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1186,14 +1186,14 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); /* write out all dirty pages from offset */ ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); if (ret) - goto out; - - /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + goto out_unlock; truncate_pagecache(inode, offset); @@ -1212,9 +1212,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (!ret) f2fs_i_size_write(inode, new_size); out_unlock: - up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); -out: up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); return ret; } @@ -1385,6 +1384,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + down_write(&F2FS_I(inode)->i_mmap_sem); ret = truncate_blocks(inode, i_size_read(inode), true); if (ret) @@ -1395,9 +1397,6 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (ret) goto out; - /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); - truncate_pagecache(inode, offset); pg_start = offset >> PAGE_SHIFT; @@ -1425,10 +1424,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (!ret) f2fs_i_size_write(inode, new_size); - - up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); out: up_write(&F2FS_I(inode)->i_mmap_sem); + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); return ret; } From 736c0a74856d762b09a997d28e3ff6d8bdcf942c Mon Sep 17 00:00:00 2001 From: LiFan Date: Sat, 25 Nov 2017 11:46:18 +0800 Subject: [PATCH 09/72] f2fs: remove an excess variable Remove the variable page_idx which no one would miss. Signed-off-by: Fan li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3fa20a932dd7..457a60ffd3ee 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1204,7 +1204,6 @@ static int f2fs_mpage_readpages(struct address_space *mapping, unsigned nr_pages) { struct bio *bio = NULL; - unsigned page_idx; sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; @@ -1221,8 +1220,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, map.m_flags = 0; map.m_next_pgofs = NULL; - for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { - + for (; nr_pages; nr_pages--) { if (pages) { page = list_last_entry(pages, struct page, lru); From de8b10ac136135e2490b951f1bbdd28417575822 Mon Sep 17 00:00:00 2001 From: Zhikang Zhang Date: Sun, 26 Nov 2017 02:34:28 +0800 Subject: [PATCH 10/72] f2fs: remove repeated f2fs_bug_on f2fs: remove repeated f2fs_bug_on which has already existed in function invalidate_blocks. Signed-off-by: Zhikang Zhang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 95d11d250e83..fe99723bb5b6 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -702,7 +702,6 @@ static void truncate_node(struct dnode_of_data *dn) struct node_info ni; get_node_info(sbi, dn->nid, &ni); - f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); /* Deallocate node address */ invalidate_blocks(sbi, ni.blk_addr); @@ -2238,7 +2237,6 @@ int recover_xattr_data(struct inode *inode, struct page *page) /* 1: invalidate the previous xattr nid */ get_node_info(sbi, prev_xnid, &ni); - f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR); invalidate_blocks(sbi, ni.blk_addr); dec_valid_node_count(sbi, inode, false); set_node_addr(sbi, &ni, NULL_ADDR, false); From 66717260545b67b04ce6b004fff26de7141b2757 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Wed, 29 Nov 2017 13:19:31 +0200 Subject: [PATCH 11/72] posix_acl: convert posix_acl.a_refcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable posix_acl.a_refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. **Important note for maintainers: Some functions from refcount_t API defined in lib/refcount.c have different memory ordering guarantees than their atomic counterparts. The full comparison can be seen in https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon in state to be merged to the documentation tree. Normally the differences should not matter since refcount_t provides enough guarantees to satisfy the refcounting use cases, but in some rare cases it might matter. Please double check that you don't have some undocumented memory guarantees for this variable usage. For the posix_acl.a_refcount it might make a difference in following places: - get_cached_acl(): increment in refcount_inc_not_zero() only guarantees control dependency on success vs. fully ordered atomic counterpart. However this operation is performed under rcu_read_lock(), so this should be fine. - posix_acl_release(): decrement in refcount_dec_and_test() only provides RELEASE ordering and control dependency on success vs. fully ordered atomic counterpart Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 2 +- fs/posix_acl.c | 6 +++--- include/linux/posix_acl.h | 7 ++++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 2bb7c9fc5144..111824199a88 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -270,7 +270,7 @@ static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl, sizeof(struct posix_acl_entry); clone = kmemdup(acl, size, flags); if (clone) - atomic_set(&clone->a_refcount, 1); + refcount_set(&clone->a_refcount, 1); } return clone; } diff --git a/fs/posix_acl.c b/fs/posix_acl.c index eebf5f6cf6d5..2fd0fde16fe1 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -43,7 +43,7 @@ struct posix_acl *get_cached_acl(struct inode *inode, int type) rcu_read_lock(); acl = rcu_dereference(*p); if (!acl || is_uncached_acl(acl) || - atomic_inc_not_zero(&acl->a_refcount)) + refcount_inc_not_zero(&acl->a_refcount)) break; rcu_read_unlock(); cpu_relax(); @@ -164,7 +164,7 @@ EXPORT_SYMBOL(get_acl); void posix_acl_init(struct posix_acl *acl, int count) { - atomic_set(&acl->a_refcount, 1); + refcount_set(&acl->a_refcount, 1); acl->a_count = count; } EXPORT_SYMBOL(posix_acl_init); @@ -197,7 +197,7 @@ posix_acl_clone(const struct posix_acl *acl, gfp_t flags) sizeof(struct posix_acl_entry); clone = kmemdup(acl, size, flags); if (clone) - atomic_set(&clone->a_refcount, 1); + refcount_set(&clone->a_refcount, 1); } return clone; } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index b2b7255ec7f5..540595a321a7 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -12,6 +12,7 @@ #include #include #include +#include #include struct posix_acl_entry { @@ -24,7 +25,7 @@ struct posix_acl_entry { }; struct posix_acl { - atomic_t a_refcount; + refcount_t a_refcount; struct rcu_head a_rcu; unsigned int a_count; struct posix_acl_entry a_entries[0]; @@ -41,7 +42,7 @@ static inline struct posix_acl * posix_acl_dup(struct posix_acl *acl) { if (acl) - atomic_inc(&acl->a_refcount); + refcount_inc(&acl->a_refcount); return acl; } @@ -51,7 +52,7 @@ posix_acl_dup(struct posix_acl *acl) static inline void posix_acl_release(struct posix_acl *acl) { - if (acl && atomic_dec_and_test(&acl->a_refcount)) + if (acl && refcount_dec_and_test(&acl->a_refcount)) kfree_rcu(acl, a_rcu); } From 2e168c82dc32e02c0d4774cbae4f07d98ff51649 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 29 Nov 2017 12:35:28 -0800 Subject: [PATCH 12/72] f2fs: switch to fscrypt_file_open() Reviewed-by: Chao Yu Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 84a011a522a1..ebe55d4793fb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -472,22 +472,10 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) static int f2fs_file_open(struct inode *inode, struct file *filp) { - struct dentry *dir; + int err = fscrypt_file_open(inode, filp); - if (f2fs_encrypted_inode(inode)) { - int ret = fscrypt_get_encryption_info(inode); - if (ret) - return -EACCES; - if (!fscrypt_has_encryption_key(inode)) - return -ENOKEY; - } - dir = dget_parent(file_dentry(filp)); - if (f2fs_encrypted_inode(d_inode(dir)) && - !fscrypt_has_permitted_context(d_inode(dir), inode)) { - dput(dir); - return -EPERM; - } - dput(dir); + if (err) + return err; return dquot_file_open(inode, filp); } From b05157e772589a5ef315ead84c95c43a7844a2b6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 29 Nov 2017 12:35:29 -0800 Subject: [PATCH 13/72] f2fs: switch to fscrypt_prepare_link() Reviewed-by: Chao Yu Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 28bdf8828e73..dcf5c3a97059 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -240,9 +240,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (unlikely(f2fs_cp_error(sbi))) return -EIO; - if (f2fs_encrypted_inode(dir) && - !fscrypt_has_permitted_context(dir, inode)) - return -EPERM; + err = fscrypt_prepare_link(old_dentry, dir, dentry); + if (err) + return err; if (is_inode_flag_set(dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(dir)->i_projid, From 2e45b07fda2560eaafe80eab7a2ad9da763f22a1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 29 Nov 2017 12:35:30 -0800 Subject: [PATCH 14/72] f2fs: switch to fscrypt_prepare_rename() Reviewed-by: Chao Yu Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index dcf5c3a97059..e910d2ebe0c6 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -800,18 +800,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; - if ((f2fs_encrypted_inode(old_dir) && - !fscrypt_has_encryption_key(old_dir)) || - (f2fs_encrypted_inode(new_dir) && - !fscrypt_has_encryption_key(new_dir))) - return -ENOKEY; - - if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) && - !fscrypt_has_permitted_context(new_dir, old_inode)) { - err = -EPERM; - goto out; - } - if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(new_dir)->i_projid, F2FS_I(old_dentry->d_inode)->i_projid))) @@ -1002,18 +990,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; - if ((f2fs_encrypted_inode(old_dir) && - !fscrypt_has_encryption_key(old_dir)) || - (f2fs_encrypted_inode(new_dir) && - !fscrypt_has_encryption_key(new_dir))) - return -ENOKEY; - - if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) && - (old_dir != new_dir) && - (!fscrypt_has_permitted_context(new_dir, old_inode) || - !fscrypt_has_permitted_context(old_dir, new_inode))) - return -EPERM; - if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, F2FS_I(old_dentry->d_inode)->i_projid)) || @@ -1153,9 +1129,16 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { + int err; + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; + err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry, + flags); + if (err) + return err; + if (flags & RENAME_EXCHANGE) { return f2fs_cross_rename(old_dir, old_dentry, new_dir, new_dentry); From 55899d7b4977759a751cf923d368e22b8c891544 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 29 Nov 2017 12:35:31 -0800 Subject: [PATCH 15/72] f2fs: switch to fscrypt_prepare_lookup() Reviewed-by: Chao Yu Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e910d2ebe0c6..bbb3fc1e2bef 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -357,20 +357,9 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, trace_f2fs_lookup_start(dir, dentry, flags); - if (f2fs_encrypted_inode(dir)) { - err = fscrypt_get_encryption_info(dir); - - /* - * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is - * created while the directory was encrypted and we - * don't have access to the key. - */ - if (fscrypt_has_encryption_key(dir)) - fscrypt_set_encrypted_dentry(dentry); - fscrypt_set_d_op(dentry); - if (err && err != -ENOKEY) - goto out; - } + err = fscrypt_prepare_lookup(dir, dentry, flags); + if (err) + goto out; if (dentry->d_name.len > F2FS_NAME_LEN) { err = -ENAMETOOLONG; From 20bb2479be49296ca324d7d9dfafad4816e7958c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 29 Nov 2017 12:35:32 -0800 Subject: [PATCH 16/72] f2fs: switch to fscrypt_prepare_setattr() Reviewed-by: Chao Yu Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ebe55d4793fb..d2bd2138514c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -743,6 +743,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (err) return err; + err = fscrypt_prepare_setattr(dentry, attr); + if (err) + return err; + if (is_quota_modification(inode, attr)) { err = dquot_initialize(inode); if (err) @@ -758,14 +762,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_SIZE) { - if (f2fs_encrypted_inode(inode)) { - err = fscrypt_get_encryption_info(inode); - if (err) - return err; - if (!fscrypt_has_encryption_key(inode)) - return -ENOKEY; - } - if (attr->ia_size <= i_size_read(inode)) { down_write(&F2FS_I(inode)->i_mmap_sem); truncate_setsize(inode, attr->ia_size); From d5097be55c21c103d2227591708425aab2e9682d Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Tue, 28 Nov 2017 09:23:00 +0900 Subject: [PATCH 17/72] f2fs: apply write hints to select the type of segment for direct write When blocks are allocated for direct write, select the type of segment using the kiocb hint. But if an inode has FI_NO_ALLOC, use the inode hint. Signed-off-by: Hyunchul Lee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 26 ++++++++++++++++++-------- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 6 ++++-- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 457a60ffd3ee..ce1a4e94e551 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -783,7 +783,7 @@ got_it: return page; } -static int __allocate_data_block(struct dnode_of_data *dn) +static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; @@ -808,7 +808,7 @@ alloc: set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, - &sum, CURSEG_WARM_DATA, NULL, false); + &sum, seg_type, NULL, false); set_data_blkaddr(dn); /* update i_size */ @@ -851,12 +851,15 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_len = 0; map.m_next_pgofs = NULL; + map.m_seg_type = NO_CHECK_TYPE; - if (iocb->ki_flags & IOCB_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) { + map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint); return f2fs_map_blocks(inode, &map, 1, __force_buffered_io(inode, WRITE) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO); + } if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) { err = f2fs_convert_inline_inode(inode); if (err) @@ -966,7 +969,8 @@ next_block: last_ofs_in_node = dn.ofs_in_node; } } else { - err = __allocate_data_block(&dn); + err = __allocate_data_block(&dn, + map->m_seg_type); if (!err) set_inode_flag(inode, FI_APPEND_WRITE); } @@ -1059,7 +1063,7 @@ out: static int __get_data_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create, int flag, - pgoff_t *next_pgofs) + pgoff_t *next_pgofs, int seg_type) { struct f2fs_map_blocks map; int err; @@ -1067,6 +1071,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, map.m_lblk = iblock; map.m_len = bh->b_size >> inode->i_blkbits; map.m_next_pgofs = next_pgofs; + map.m_seg_type = seg_type; err = f2fs_map_blocks(inode, &map, create, flag); if (!err) { @@ -1082,14 +1087,17 @@ static int get_data_block(struct inode *inode, sector_t iblock, pgoff_t *next_pgofs) { return __get_data_block(inode, iblock, bh_result, create, - flag, next_pgofs); + flag, next_pgofs, + NO_CHECK_TYPE); } static int get_data_block_dio(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DEFAULT, NULL); + F2FS_GET_BLOCK_DEFAULT, NULL, + rw_hint_to_seg_type( + inode->i_write_hint)); } static int get_data_block_bmap(struct inode *inode, sector_t iblock, @@ -1100,7 +1108,8 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock, return -EFBIG; return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_BMAP, NULL); + F2FS_GET_BLOCK_BMAP, NULL, + NO_CHECK_TYPE); } static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) @@ -1219,6 +1228,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, map.m_len = 0; map.m_flags = 0; map.m_next_pgofs = NULL; + map.m_seg_type = NO_CHECK_TYPE; for (; nr_pages; nr_pages--) { if (pages) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0b14df0bee27..8bbb1628e61f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -542,6 +542,7 @@ struct f2fs_map_blocks { unsigned int m_len; unsigned int m_flags; pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ + int m_seg_type; }; /* for flag in get_data_block */ @@ -2674,6 +2675,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi); void destroy_segment_manager(struct f2fs_sb_info *sbi); int __init create_segment_manager_caches(void); void destroy_segment_manager_caches(void); +int rw_hint_to_seg_type(enum rw_hint hint); /* * checkpoint.c diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d2bd2138514c..6db8a46b2b73 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1418,7 +1418,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, loff_t len, int mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; + struct f2fs_map_blocks map = { .m_next_pgofs = NULL, + .m_seg_type = NO_CHECK_TYPE }; pgoff_t pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; @@ -2066,7 +2067,8 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct f2fs_defragment *range) { struct inode *inode = file_inode(filp); - struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; + struct f2fs_map_blocks map = { .m_next_pgofs = NULL, + .m_seg_type = NO_CHECK_TYPE }; struct extent_info ei = {0,0,0}; pgoff_t pg_start, pg_end; unsigned int blk_per_seg = sbi->blocks_per_seg; From 979f492fe387e88a0c97566853ebb615f19a3221 Mon Sep 17 00:00:00 2001 From: LiFan Date: Tue, 28 Nov 2017 20:17:41 +0800 Subject: [PATCH 18/72] f2fs: remove a redundant conditional expression Avoid checking is_inode repeatedly, and make the logic a little bit clearer. Signed-off-by: Fan li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8bbb1628e61f..f3b46197b282 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1996,11 +1996,11 @@ static inline block_t datablock_addr(struct inode *inode, raw_node = F2FS_NODE(node_page); /* from GC path only */ - if (!inode) { - if (is_inode) + if (is_inode) { + if (!inode) base = offset_in_addr(&raw_node->i); - } else if (f2fs_has_extra_attr(inode) && is_inode) { - base = get_extra_isize(inode); + else if (f2fs_has_extra_attr(inode)) + base = get_extra_isize(inode); } addr_array = blkaddr_in_node(raw_node); From acbf054d537d7efddc232e6c11a15d4be0507fd6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Nov 2017 19:28:17 +0800 Subject: [PATCH 19/72] f2fs: inject fault to kzalloc This patch introduces f2fs_kzalloc based on f2fs_kmalloc in order to support error injection for kzalloc(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 6 ++++++ fs/f2fs/namei.c | 2 +- fs/f2fs/node.c | 7 ++++--- fs/f2fs/segment.c | 30 ++++++++++++++++-------------- fs/f2fs/xattr.c | 8 ++++---- 7 files changed, 33 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 4aa69bc1c70a..097eadc82432 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -793,7 +793,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) block_t cp_blk_no; int i; - sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); + sbi->ckpt = f2fs_kzalloc(sbi, cp_blks * blk_size, GFP_KERNEL); if (!sbi->ckpt) return -ENOMEM; /* diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 674f9bbe98d9..a66107b5cfff 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -439,7 +439,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; - si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL); + si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); if (!si) return -ENOMEM; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f3b46197b282..3f3d062c349e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2422,6 +2422,12 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, return kmalloc(size, flags); } +static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, + size_t size, gfp_t flags) +{ + return f2fs_kmalloc(sbi, size, flags | __GFP_ZERO); +} + static inline int get_extra_isize(struct inode *inode) { return F2FS_I(inode)->i_extra_isize / sizeof(__le32); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index bbb3fc1e2bef..e3a0f4f65851 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -533,7 +533,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, struct qstr istr = QSTR_INIT(symname, len); struct fscrypt_str ostr; - sd = kzalloc(disk_link.len, GFP_NOFS); + sd = f2fs_kzalloc(sbi, disk_link.len, GFP_NOFS); if (!sd) { err = -ENOMEM; goto err_out; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index fe99723bb5b6..f3ab3a917d13 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2581,8 +2581,8 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) nm_i->nat_bits_blocks = F2FS_BYTES_TO_BLK((nat_bits_bytes << 1) + 8 + F2FS_BLKSIZE - 1); - nm_i->nat_bits = kzalloc(nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, - GFP_KERNEL); + nm_i->nat_bits = f2fs_kzalloc(sbi, + nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); if (!nm_i->nat_bits) return -ENOMEM; @@ -2728,7 +2728,8 @@ int build_node_manager(struct f2fs_sb_info *sbi) { int err; - sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL); + sbi->nm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_nm_info), + GFP_KERNEL); if (!sbi->nm_info) return -ENOMEM; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c117e0913f2a..51307f9d2cb0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -657,7 +657,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) goto init_thread; } - fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); + fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; atomic_set(&fcc->issued_flush, 0); @@ -1737,7 +1737,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) goto init_thread; } - dcc = kzalloc(sizeof(struct discard_cmd_control), GFP_KERNEL); + dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL); if (!dcc) return -ENOMEM; @@ -3338,7 +3338,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) unsigned int bitmap_size; /* allocate memory for SIT information */ - sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL); + sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL); if (!sit_i) return -ENOMEM; @@ -3356,29 +3356,30 @@ static int build_sit_info(struct f2fs_sb_info *sbi) for (start = 0; start < MAIN_SEGS(sbi); start++) { sit_i->sentries[start].cur_valid_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); sit_i->sentries[start].ckpt_valid_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->sentries[start].cur_valid_map || !sit_i->sentries[start].ckpt_valid_map) return -ENOMEM; #ifdef CONFIG_F2FS_CHECK_FS sit_i->sentries[start].cur_valid_map_mir - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->sentries[start].cur_valid_map_mir) return -ENOMEM; #endif if (f2fs_discard_en(sbi)) { sit_i->sentries[start].discard_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, + GFP_KERNEL); if (!sit_i->sentries[start].discard_map) return -ENOMEM; } } - sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->tmp_map) return -ENOMEM; @@ -3427,7 +3428,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) unsigned int bitmap_size, sec_bitmap_size; /* allocate memory for free segmap information */ - free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL); + free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL); if (!free_i) return -ENOMEM; @@ -3468,12 +3469,12 @@ static int build_curseg(struct f2fs_sb_info *sbi) for (i = 0; i < NR_CURSEG_TYPE; i++) { mutex_init(&array[i].curseg_mutex); - array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL); + array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL); if (!array[i].sum_blk) return -ENOMEM; init_rwsem(&array[i].journal_rwsem); - array[i].journal = kzalloc(sizeof(struct f2fs_journal), - GFP_KERNEL); + array[i].journal = f2fs_kzalloc(sbi, + sizeof(struct f2fs_journal), GFP_KERNEL); if (!array[i].journal) return -ENOMEM; array[i].segno = NULL_SEGNO; @@ -3631,7 +3632,8 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) unsigned int bitmap_size, i; /* allocate memory for dirty segments list information */ - dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL); + dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info), + GFP_KERNEL); if (!dirty_i) return -ENOMEM; @@ -3685,7 +3687,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) struct f2fs_sm_info *sm_info; int err; - sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL); + sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL); if (!sm_info) return -ENOMEM; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index ec8961ef8cac..5e2b905cc0c4 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -298,8 +298,8 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, if (!size && !inline_size) return -ENODATA; - txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE, - GFP_F2FS_ZERO); + txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), + inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS); if (!txattr_addr) return -ENOMEM; @@ -351,8 +351,8 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, void *txattr_addr; int err; - txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE, - GFP_F2FS_ZERO); + txattr_addr = f2fs_kzalloc(F2FS_I_SB(inode), + inline_size + size + XATTR_PADDING_SIZE, GFP_NOFS); if (!txattr_addr) return -ENOMEM; From 628b3d1438fbcb1f9e79472a0872a2c3018a48be Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Nov 2017 19:28:18 +0800 Subject: [PATCH 20/72] f2fs: inject fault to kvmalloc This patch supports to inject fault into kvmalloc/kvzalloc. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 19 +++++++++++++++++++ fs/f2fs/file.c | 6 ++++-- fs/f2fs/node.c | 6 +++--- fs/f2fs/segment.c | 16 +++++++++------- fs/f2fs/super.c | 1 + 5 files changed, 36 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3f3d062c349e..9b967eba3e69 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -43,6 +43,7 @@ #ifdef CONFIG_F2FS_FAULT_INJECTION enum { FAULT_KMALLOC, + FAULT_KVMALLOC, FAULT_PAGE_ALLOC, FAULT_PAGE_GET, FAULT_ALLOC_BIO, @@ -2428,6 +2429,24 @@ static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi, return f2fs_kmalloc(sbi, size, flags | __GFP_ZERO); } +static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi, + size_t size, gfp_t flags) +{ +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_KVMALLOC)) { + f2fs_show_injection_info(FAULT_KVMALLOC); + return NULL; + } +#endif + return kvmalloc(size, flags); +} + +static inline void *f2fs_kvzalloc(struct f2fs_sb_info *sbi, + size_t size, gfp_t flags) +{ + return f2fs_kvmalloc(sbi, size, flags | __GFP_ZERO); +} + static inline int get_extra_isize(struct inode *inode) { return F2FS_I(inode)->i_extra_isize / sizeof(__le32); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6db8a46b2b73..a85c9aafd7a1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1098,11 +1098,13 @@ static int __exchange_data_block(struct inode *src_inode, while (len) { olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len); - src_blkaddr = kvzalloc(sizeof(block_t) * olen, GFP_KERNEL); + src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), + sizeof(block_t) * olen, GFP_KERNEL); if (!src_blkaddr) return -ENOMEM; - do_replace = kvzalloc(sizeof(int) * olen, GFP_KERNEL); + do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), + sizeof(int) * olen, GFP_KERNEL); if (!do_replace) { kvfree(src_blkaddr); return -ENOMEM; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f3ab3a917d13..a9c340e43af9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2707,17 +2707,17 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); - nm_i->free_nid_bitmap = kvzalloc(nm_i->nat_blocks * + nm_i->free_nid_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks * NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL); if (!nm_i->free_nid_bitmap) return -ENOMEM; - nm_i->nat_block_bitmap = kvzalloc(nm_i->nat_blocks / 8, + nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8, GFP_KERNEL); if (!nm_i->nat_block_bitmap) return -ENOMEM; - nm_i->free_nid_count = kvzalloc(nm_i->nat_blocks * + nm_i->free_nid_count = f2fs_kvzalloc(sbi, nm_i->nat_blocks * sizeof(unsigned short), GFP_KERNEL); if (!nm_i->free_nid_count) return -ENOMEM; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 51307f9d2cb0..56d9fecf36a4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3344,13 +3344,14 @@ static int build_sit_info(struct f2fs_sb_info *sbi) SM_I(sbi)->sit_info = sit_i; - sit_i->sentries = kvzalloc(MAIN_SEGS(sbi) * + sit_i->sentries = f2fs_kvzalloc(sbi, MAIN_SEGS(sbi) * sizeof(struct seg_entry), GFP_KERNEL); if (!sit_i->sentries) return -ENOMEM; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - sit_i->dirty_sentries_bitmap = kvzalloc(bitmap_size, GFP_KERNEL); + sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size, + GFP_KERNEL); if (!sit_i->dirty_sentries_bitmap) return -ENOMEM; @@ -3384,7 +3385,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = kvzalloc(MAIN_SECS(sbi) * + sit_i->sec_entries = f2fs_kvzalloc(sbi, MAIN_SECS(sbi) * sizeof(struct sec_entry), GFP_KERNEL); if (!sit_i->sec_entries) return -ENOMEM; @@ -3435,12 +3436,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) SM_I(sbi)->free_info = free_i; bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); - free_i->free_segmap = kvmalloc(bitmap_size, GFP_KERNEL); + free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL); if (!free_i->free_segmap) return -ENOMEM; sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - free_i->free_secmap = kvmalloc(sec_bitmap_size, GFP_KERNEL); + free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL); if (!free_i->free_secmap) return -ENOMEM; @@ -3620,7 +3621,7 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi) struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - dirty_i->victim_secmap = kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL); if (!dirty_i->victim_secmap) return -ENOMEM; return 0; @@ -3643,7 +3644,8 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); for (i = 0; i < NR_DIRTY_TYPE; i++) { - dirty_i->dirty_segmap[i] = kvzalloc(bitmap_size, GFP_KERNEL); + dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size, + GFP_KERNEL); if (!dirty_i->dirty_segmap[i]) return -ENOMEM; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7ef7620b2010..20935d10d8f1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -43,6 +43,7 @@ static struct kmem_cache *f2fs_inode_cachep; char *fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", + [FAULT_KVMALLOC] = "kvmalloc", [FAULT_PAGE_ALLOC] = "page alloc", [FAULT_PAGE_GET] = "page get", [FAULT_ALLOC_BIO] = "alloc bio", From 4e6aad29bc8d5d56f053c9bb2fe7a5178ea6f3dc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Nov 2017 19:28:19 +0800 Subject: [PATCH 21/72] f2fs: spread f2fs_k{m,z}alloc Use f2fs_k{m,z}alloc as much as possible to increase fault injection points. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 56d9fecf36a4..2d38c7f50e0b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3462,7 +3462,7 @@ static int build_curseg(struct f2fs_sb_info *sbi) struct curseg_info *array; int i; - array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL); + array = f2fs_kzalloc(sbi, sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); if (!array) return -ENOMEM; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 20935d10d8f1..2fabb131d385 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2150,14 +2150,15 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) if (nr_sectors & (bdev_zone_sectors(bdev) - 1)) FDEV(devi).nr_blkz++; - FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL); + FDEV(devi).blkz_type = f2fs_kmalloc(sbi, FDEV(devi).nr_blkz, + GFP_KERNEL); if (!FDEV(devi).blkz_type) return -ENOMEM; #define F2FS_REPORT_NR_ZONES 4096 - zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone), - GFP_KERNEL); + zones = f2fs_kzalloc(sbi, sizeof(struct blk_zone) * + F2FS_REPORT_NR_ZONES, GFP_KERNEL); if (!zones) return -ENOMEM; @@ -2297,8 +2298,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) * Initialize multiple devices information, or single * zoned block device information. */ - sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info), - GFP_KERNEL); + sbi->devs = f2fs_kzalloc(sbi, sizeof(struct f2fs_dev_info) * + max_devices, GFP_KERNEL); if (!sbi->devs) return -ENOMEM; @@ -2504,8 +2505,9 @@ try_onemore: int n = (i == META) ? 1: NR_TEMP_TYPE; int j; - sbi->write_io[i] = kmalloc(n * sizeof(struct f2fs_bio_info), - GFP_KERNEL); + sbi->write_io[i] = f2fs_kmalloc(sbi, + n * sizeof(struct f2fs_bio_info), + GFP_KERNEL); if (!sbi->write_io[i]) { err = -ENOMEM; goto free_options; From bae01eda8e6ef3ba66ee068d91fd4dd31a649c6f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Nov 2017 19:28:20 +0800 Subject: [PATCH 22/72] f2fs: fix error handling in fill_super In fill_super, if we fail to call f2fs_build_stats(), it needs to detach from global f2fs shrink list, otherwise once system starts to shrink slab cache, we will encounter below panic: BUG: unable to handle kernel paging request at 00007d35 Oops: 0002 [#1] PREEMPT SMP EIP: __lock_acquire+0x70/0x12c0 Call Trace: lock_acquire+0xae/0x220 mutex_trylock+0xc5/0xf0 f2fs_shrink_count+0x32/0xb0 [f2fs] shrink_slab+0xf1/0x5b0 drop_slab_node+0x35/0x60 drop_slab+0xf/0x20 drop_caches_sysctl_handler+0x79/0xc0 proc_sys_call_handler+0xa4/0xc0 proc_sys_write+0x1f/0x30 __vfs_write+0x24/0x150 SyS_write+0x44/0x90 do_fast_syscall_32+0xa1/0x1ca entry_SYSENTER_32+0x4c/0x7b In addition, this patch relocates f2fs_join_shrinker in fill_super to avoid unneeded error handling of it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2fabb131d385..f56230759b38 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2615,18 +2615,16 @@ try_onemore: goto free_nm; } - f2fs_join_shrinker(sbi); - err = f2fs_build_stats(sbi); if (err) - goto free_nm; + goto free_node_inode; /* read root inode and dentry */ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); if (IS_ERR(root)) { f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); err = PTR_ERR(root); - goto free_node_inode; + goto free_stats; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { iput(root); @@ -2722,6 +2720,8 @@ skip_recovery: sbi->valid_super_block ? 1 : 2, err); } + f2fs_join_shrinker(sbi); + f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx", cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); @@ -2748,14 +2748,12 @@ free_sysfs: free_root_inode: dput(sb->s_root); sb->s_root = NULL; -free_node_inode: - truncate_inode_pages_final(NODE_MAPPING(sbi)); - mutex_lock(&sbi->umount_mutex); - release_ino_entry(sbi, true); - f2fs_leave_shrinker(sbi); - iput(sbi->node_inode); - mutex_unlock(&sbi->umount_mutex); +free_stats: f2fs_destroy_stats(sbi); +free_node_inode: + release_ino_entry(sbi, true); + truncate_inode_pages_final(NODE_MAPPING(sbi)); + iput(sbi->node_inode); free_nm: destroy_node_manager(sbi); free_sm: From 416d2dbb4ebc0be6899c0155828ce03c9a01a023 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Nov 2017 19:28:21 +0800 Subject: [PATCH 23/72] f2fs: clean up hash codes f2fs_chksum and f2fs_crc32 use the same 'crc32' crypto engine, also their implementation are almost the same, except with different shash description context. Introduce __f2fs_crc32 to wrap the common codes, and reuse it in f2fs_chksum and f2fs_crc32. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9b967eba3e69..55fb18e6e644 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1255,33 +1255,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi) /* * Inline functions */ -static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, - unsigned int length) -{ - SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver); - u32 *ctx = (u32 *)shash_desc_ctx(shash); - u32 retval; - int err; - - shash->tfm = sbi->s_chksum_driver; - shash->flags = 0; - *ctx = F2FS_SUPER_MAGIC; - - err = crypto_shash_update(shash, address, length); - BUG_ON(err); - - retval = *ctx; - barrier_data(ctx); - return retval; -} - -static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, - void *buf, size_t buf_size) -{ - return f2fs_crc32(sbi, buf, buf_size) == blk_crc; -} - -static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc, +static inline u32 __f2fs_crc32(struct f2fs_sb_info *sbi, u32 crc, const void *address, unsigned int length) { struct { @@ -1302,6 +1276,24 @@ static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc, return *(u32 *)desc.ctx; } +static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, + unsigned int length) +{ + return __f2fs_crc32(sbi, F2FS_SUPER_MAGIC, address, length); +} + +static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, + void *buf, size_t buf_size) +{ + return f2fs_crc32(sbi, buf, buf_size) == blk_crc; +} + +static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc, + const void *address, unsigned int length) +{ + return __f2fs_crc32(sbi, crc, address, length); +} + static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) { return container_of(inode, struct f2fs_inode_info, vfs_inode); From 4c2ac6a86073a4cbcea123f8be84bd4417eae001 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Nov 2017 19:28:22 +0800 Subject: [PATCH 24/72] f2fs: clean up f2fs_map_blocks f2fs_map_blocks(): if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) { if (create) { ... } else { ... if (flag == F2FS_GET_BLOCK_FIEMAP && blkaddr == NULL_ADDR) { ... } if (flag != F2FS_GET_BLOCK_FIEMAP || blkaddr != NEW_ADDR) goto sync_out; } It means we can break the loop in cases of: a) flag != F2FS_GET_BLOCK_FIEMAP or b) flag == F2FS_GET_BLOCK_FIEMAP && blkaddr == NULL_ADDR Condition b) is the same as previous one, so merge operations of them for readability. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ce1a4e94e551..7aca6ccd01f6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -987,9 +987,9 @@ next_block: blkaddr == NULL_ADDR) { if (map->m_next_pgofs) *map->m_next_pgofs = pgofs + 1; + goto sync_out; } - if (flag != F2FS_GET_BLOCK_FIEMAP || - blkaddr != NEW_ADDR) + if (flag != F2FS_GET_BLOCK_FIEMAP) goto sync_out; } } From f652e9d988d6b4d2a81c920374431a73b70090fd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 30 Nov 2017 19:28:23 +0800 Subject: [PATCH 25/72] f2fs: don't return value in truncate_data_blocks_range There is no caller cares about return value of truncate_data_blocks_range, remove it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 55fb18e6e644..e9e72844220c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2500,7 +2500,7 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); int f2fs_setattr(struct dentry *dentry, struct iattr *attr); int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); -int truncate_data_blocks_range(struct dnode_of_data *dn, int count); +void truncate_data_blocks_range(struct dnode_of_data *dn, int count); long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a85c9aafd7a1..1db68f1bcd77 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -479,7 +479,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) return dquot_file_open(inode, filp); } -int truncate_data_blocks_range(struct dnode_of_data *dn, int count) +void truncate_data_blocks_range(struct dnode_of_data *dn, int count) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_node *raw_node; @@ -522,7 +522,6 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) f2fs_update_time(sbi, REQ_TIME); trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid, dn->ofs_in_node, nr_free); - return nr_free; } void truncate_data_blocks(struct dnode_of_data *dn) From fab2adee36039cdc2002ba3fe3a416c3f1bc6b52 Mon Sep 17 00:00:00 2001 From: LiFan Date: Tue, 5 Dec 2017 16:38:01 +0800 Subject: [PATCH 26/72] f2fs: use unlikely for release case Since the variable release is only nonzero when another unlikely case occurs, use unlikely() on it seems logical. Signed-off-by: Fan li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e9e72844220c..930cf430dcc2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1594,7 +1594,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, } spin_unlock(&sbi->stat_lock); - if (release) + if (unlikely(release)) dquot_release_reservation_block(inode, release); f2fs_i_blocks_write(inode, *count, true, true); return 0; From c376fc0f3581c0b08099b0b7bfa448c30bc71c0c Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Wed, 6 Dec 2017 11:31:29 +0800 Subject: [PATCH 27/72] f2fs: no need return value in restore summary process No need return value in restore summary process Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 3 +-- fs/f2fs/segment.c | 14 +++----------- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 930cf430dcc2..b68166e28b15 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2627,7 +2627,7 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); void recover_inline_xattr(struct inode *inode, struct page *page); int recover_xattr_data(struct inode *inode, struct page *page); int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); -int restore_node_summary(struct f2fs_sb_info *sbi, +void restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); int build_node_manager(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a9c340e43af9..35b079d98c17 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2325,7 +2325,7 @@ retry: return 0; } -int restore_node_summary(struct f2fs_sb_info *sbi, +void restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum) { struct f2fs_node *rn; @@ -2358,7 +2358,6 @@ int restore_node_summary(struct f2fs_sb_info *sbi, invalidate_mapping_pages(META_MAPPING(sbi), addr, addr + nrpages); } - return 0; } static void remove_nats_in_journal(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2d38c7f50e0b..40e1d20b8080 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2823,7 +2823,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) } } -static int read_compacted_summaries(struct f2fs_sb_info *sbi) +static void read_compacted_summaries(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *seg_i; @@ -2880,7 +2880,6 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) } } f2fs_put_page(page, 1); - return 0; } static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) @@ -2926,13 +2925,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) ns->ofs_in_node = 0; } } else { - int err; - - err = restore_node_summary(sbi, segno, sum); - if (err) { - f2fs_put_page(new, 1); - return err; - } + restore_node_summary(sbi, segno, sum); } } @@ -2971,8 +2964,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) META_CP, true); /* restore for compacted data summary */ - if (read_compacted_summaries(sbi)) - return -EINVAL; + read_compacted_summaries(sbi); type = CURSEG_HOT_NODE; } From 4635b46af2b3921829de09e4be1f6b71a62e4855 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 12 Dec 2017 14:11:40 +0800 Subject: [PATCH 28/72] f2fs: fix potential hangtask in f2fs_trace_pid As Jia-Ju Bai reported: "According to fs/f2fs/trace.c, the kernel module may sleep under a spinlock. The function call path is: f2fs_trace_pid (acquire the spinlock) f2fs_radix_tree_insert cond_resched --> may sleep I do not find a good way to fix it, so I only report. This possible bug is found by my static analysis tool (DSAC) and my code review." Obviously, it's problemetic to schedule in critical region of spinlock, which will cause uninterruptable sleep if there is no waker. This patch changes to use mutex lock intead of spinlock to avoid this condition. Reported-by: Jia-Ju Bai Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/trace.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index bccbbf2616d2..a1fcd00bbb2b 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -17,7 +17,7 @@ #include "trace.h" static RADIX_TREE(pids, GFP_ATOMIC); -static spinlock_t pids_lock; +static struct mutex pids_lock; static struct last_io_info last_io; static inline void __print_last_io(void) @@ -64,7 +64,7 @@ void f2fs_trace_pid(struct page *page) if (radix_tree_preload(GFP_NOFS)) return; - spin_lock(&pids_lock); + mutex_lock(&pids_lock); p = radix_tree_lookup(&pids, pid); if (p == current) goto out; @@ -77,7 +77,7 @@ void f2fs_trace_pid(struct page *page) MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), pid, current->comm); out: - spin_unlock(&pids_lock); + mutex_unlock(&pids_lock); radix_tree_preload_end(); } @@ -122,7 +122,7 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush) void f2fs_build_trace_ios(void) { - spin_lock_init(&pids_lock); + mutex_init(&pids_lock); } #define PIDVEC_SIZE 128 @@ -150,7 +150,7 @@ void f2fs_destroy_trace_ios(void) pid_t next_pid = 0; unsigned int found; - spin_lock(&pids_lock); + mutex_lock(&pids_lock); while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) { unsigned idx; @@ -158,5 +158,5 @@ void f2fs_destroy_trace_ios(void) for (idx = 0; idx < found; idx++) radix_tree_delete(&pids, pid[idx]); } - spin_unlock(&pids_lock); + mutex_unlock(&pids_lock); } From 211a6fa04c56e8c57faca245880148bcb6419073 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 5 Dec 2017 12:07:47 +0800 Subject: [PATCH 29/72] f2fs: fix an error case of missing update inode page -Thread A Thread B -write_checkpoint -block_operations -f2fs_unlock_all -f2fs_sync_file -f2fs_write_inode -f2fs_inode_synced -f2fs_sync_inode_meta -sync_node_pages -set_page_drity In this case, if sudden power off without next new checkpoint, the last inode page update will lost. wb_writeback is same with fsync. Yunlei also reproduced the bug by: @@ -366,7 +366,7 @@ int update_inode(struct inode *inode, struct page *node_page) struct extent_tree *et = F2FS_I(inode)->extent_tree; f2fs_inode_synced(inode); - + msleep(10000); f2fs_wait_on_page_writeback(node_page, NODE, true); shell 1: shell2: dd if=/dev/zero of=./test bs=1M count=10 sync echo "hello" >> ./test fsync test // sleep 10s sync //return quickly echo c > /proc/sysrq-trigger Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/inode.c | 16 +++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b68166e28b15..df00affe1450 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2513,8 +2513,8 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page); struct inode *f2fs_iget(struct super_block *sb, unsigned long ino); struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino); int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); -int update_inode(struct inode *inode, struct page *node_page); -int update_inode_page(struct inode *inode); +void update_inode(struct inode *inode, struct page *node_page); +void update_inode_page(struct inode *inode); int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc); void f2fs_evict_inode(struct inode *inode); void handle_failed_inode(struct inode *inode); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index b4c4f2b25304..234322889e65 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -360,14 +360,15 @@ retry: return inode; } -int update_inode(struct inode *inode, struct page *node_page) +void update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; struct extent_tree *et = F2FS_I(inode)->extent_tree; - f2fs_inode_synced(inode); - f2fs_wait_on_page_writeback(node_page, NODE, true); + set_page_dirty(node_page); + + f2fs_inode_synced(inode); ri = F2FS_INODE(node_page); @@ -426,14 +427,12 @@ int update_inode(struct inode *inode, struct page *node_page) if (inode->i_nlink == 0) clear_inline_node(node_page); - return set_page_dirty(node_page); } -int update_inode_page(struct inode *inode) +void update_inode_page(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *node_page; - int ret = 0; retry: node_page = get_node_page(sbi, inode->i_ino); if (IS_ERR(node_page)) { @@ -444,11 +443,10 @@ retry: } else if (err != -ENOENT) { f2fs_stop_checkpoint(sbi, false); } - return 0; + return; } - ret = update_inode(inode, node_page); + update_inode(inode, node_page); f2fs_put_page(node_page, 1); - return ret; } int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) From c39a1b348c4fe172729eff77c533dabc3c7cdaa7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 19 Dec 2017 19:16:34 -0800 Subject: [PATCH 30/72] f2fs: return error during fill_super Let's avoid BUG_ON during fill_super, when on-disk was totall corrupted. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 16 ++++++++++++---- fs/f2fs/segment.h | 22 ++++++++++++++++++---- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 40e1d20b8080..b03f65e5dfdc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3476,7 +3476,7 @@ static int build_curseg(struct f2fs_sb_info *sbi) return restore_curseg_summaries(sbi); } -static void build_sit_entries(struct f2fs_sb_info *sbi) +static int build_sit_entries(struct f2fs_sb_info *sbi) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); @@ -3486,6 +3486,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) int sit_blk_cnt = SIT_BLK_CNT(sbi); unsigned int i, start, end; unsigned int readed, start_blk = 0; + int err = 0; do { readed = ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, @@ -3504,7 +3505,9 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; f2fs_put_page(page, 1); - check_block_count(sbi, start, &sit); + err = check_block_count(sbi, start, &sit); + if (err) + return err; seg_info_from_raw_sit(se, &sit); /* build discard map only one time */ @@ -3539,7 +3542,9 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) old_valid_blocks = se->valid_blocks; - check_block_count(sbi, start, &sit); + err = check_block_count(sbi, start, &sit); + if (err) + break; seg_info_from_raw_sit(se, &sit); if (f2fs_discard_en(sbi)) { @@ -3559,6 +3564,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) se->valid_blocks - old_valid_blocks; } up_read(&curseg->journal_rwsem); + return err; } static void init_free_segmap(struct f2fs_sb_info *sbi) @@ -3733,7 +3739,9 @@ int build_segment_manager(struct f2fs_sb_info *sbi) return err; /* reinit free segmap based on SIT */ - build_sit_entries(sbi); + err = build_sit_entries(sbi); + if (err) + return err; init_free_segmap(sbi); err = build_dirty_segmap(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index d1d394cdf61d..71a2aaa286df 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -655,7 +655,7 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) /* * Summary block is always treated as an invalid block */ -static inline void check_block_count(struct f2fs_sb_info *sbi, +static inline int check_block_count(struct f2fs_sb_info *sbi, int segno, struct f2fs_sit_entry *raw_sit) { #ifdef CONFIG_F2FS_CHECK_FS @@ -677,11 +677,25 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, cur_pos = next_pos; is_valid = !is_valid; } while (cur_pos < sbi->blocks_per_seg); - BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); + + if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Mismatch valid blocks %d vs. %d", + GET_SIT_VBLOCKS(raw_sit), valid_blocks); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return -EINVAL; + } #endif /* check segment usage, and check boundary of a given segment number */ - f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg - || segno > TOTAL_SEGS(sbi) - 1); + if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg + || segno > TOTAL_SEGS(sbi) - 1)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Wrong valid blocks %d or segno %u", + GET_SIT_VBLOCKS(raw_sit), segno); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return -EINVAL; + } + return 0; } static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, From 0a007b97aad6e1700ef5c3815d14e88192cc1124 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 28 Dec 2017 08:09:44 -0800 Subject: [PATCH 31/72] f2fs: recover directory operations by fsync This fixes generic/342 which doesn't recover renamed file which was fsynced before. It will be done via another fsync on newly created file. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 ++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 3 +++ fs/f2fs/namei.c | 4 ++++ include/trace/events/f2fs.h | 3 ++- 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 724304dc6143..f00b5ed8c011 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -713,6 +713,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); + add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); + if (f2fs_has_inline_dentry(dir)) return f2fs_delete_inline_entry(dentry, page, dir, inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index df00affe1450..46d3d89f3ac5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -180,6 +180,7 @@ enum { ORPHAN_INO, /* for orphan ino list */ APPEND_INO, /* for append ino list */ UPDATE_INO, /* for update ino list */ + TRANS_DIR_INO, /* for trasactions dir ino list */ FLUSH_INO, /* for multiple device flushing */ MAX_INO_ENTRY, /* max. list */ }; @@ -924,6 +925,7 @@ enum cp_reason_type { CP_NODE_NEED_CP, CP_FASTBOOT_MODE, CP_SPEC_LOG_NUM, + CP_RECOVER_DIR, }; enum iostat_type { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1db68f1bcd77..322aab9d91b5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -165,6 +165,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) cp_reason = CP_FASTBOOT_MODE; else if (sbi->active_logs == 2) cp_reason = CP_SPEC_LOG_NUM; + else if (need_dentry_mark(sbi, inode->i_ino) && + exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) + cp_reason = CP_RECOVER_DIR; return cp_reason; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index e3a0f4f65851..a885c6e659f8 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -935,6 +935,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_i_links_write(old_dir, false); } + add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); f2fs_unlock_op(sbi); @@ -1089,6 +1090,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_mark_inode_dirty_sync(new_dir, false); + add_ino_entry(sbi, old_dir->i_ino, TRANS_DIR_INO); + add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); + f2fs_unlock_op(sbi); if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 8f8dd42fa57b..06c87f9f720c 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -147,7 +147,8 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { CP_NO_SPC_ROLL, "no space roll forward" }, \ { CP_NODE_NEED_CP, "node needs cp" }, \ { CP_FASTBOOT_MODE, "fastboot mode" }, \ - { CP_SPEC_LOG_NUM, "log type is 2" }) + { CP_SPEC_LOG_NUM, "log type is 2" }, \ + { CP_RECOVER_DIR, "dir needs recovery" }) struct victim_sel_policy; struct f2fs_map_blocks; From d620439f25fad30e89cde2e10fbd26a1ec8d47a0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 28 Dec 2017 17:47:19 -0800 Subject: [PATCH 32/72] f2fs: fix missing error number for xattr operation This fixes generic/449 hang problem caused by no ENOSPC forever which should be returned by setxattr under disk full scenario. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 5e2b905cc0c4..600162f4ddbf 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -433,6 +433,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, if (F2FS_I(inode)->i_xattr_nid) { xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); if (IS_ERR(xpage)) { + err = PTR_ERR(xpage); alloc_nid_failed(sbi, new_nid); goto in_page_out; } @@ -443,6 +444,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, set_new_dnode(&dn, inode, NULL, NULL, new_nid); xpage = new_node_page(&dn, XATTR_NODE_OFFSET); if (IS_ERR(xpage)) { + err = PTR_ERR(xpage); alloc_nid_failed(sbi, new_nid); goto in_page_out; } From b1ca321d1cd8f09965e7306ccf3998c7eb7e7b19 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 31 Dec 2017 16:26:38 -0800 Subject: [PATCH 33/72] f2fs: skip stop_checkpoint for user data writes We can give another chance to write user data, which can resolve generic/441. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7aca6ccd01f6..449b0aaa3905 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -111,7 +111,8 @@ static void f2fs_write_end_io(struct bio *bio) if (unlikely(bio->bi_status)) { mapping_set_error(page->mapping, -EIO); - f2fs_stop_checkpoint(sbi, true); + if (type == F2FS_WB_CP_DATA) + f2fs_stop_checkpoint(sbi, true); } dec_page_count(sbi, type); clear_cold_data(page); From 6279398db76cb7dcdafe42b34273b43a5144e304 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 2 Jan 2018 11:03:19 -0800 Subject: [PATCH 34/72] f2fs: enable quota at remount from r to w We have to enable quota only when remounting from read to write. Otherwise, we'll get remount failure. (e.g., write to write case) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f56230759b38..0a820ba55b10 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1265,7 +1265,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) err = dquot_suspend(sb, -1); if (err < 0) goto restore_opts; - } else { + } else if (f2fs_readonly(sb) && !(*flags & MS_RDONLY)) { /* dquot_resume needs RW */ sb->s_flags &= ~SB_RDONLY; if (sb_any_quota_suspended(sb)) { From d6d478a14bd3bc77afe813fa9c2ce8ebebf01b2d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 3 Jan 2018 17:30:19 +0800 Subject: [PATCH 35/72] f2fs: continue to do direct IO if we only preallocate partial blocks While doing direct IO, if we run out-of-space when we preallocate blocks, we should not return ENOSPC error directly, instead, we should continue to do following direct IO, which will keep directIO of f2fs acting like other filesystems. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 449b0aaa3905..a023863ef27f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -832,10 +832,12 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); struct f2fs_map_blocks map; + int flag; int err = 0; + bool direct_io = iocb->ki_flags & IOCB_DIRECT; /* convert inline data for Direct I/O*/ - if (iocb->ki_flags & IOCB_DIRECT) { + if (direct_io) { err = f2fs_convert_inline_inode(inode); if (err) return err; @@ -854,25 +856,29 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_next_pgofs = NULL; map.m_seg_type = NO_CHECK_TYPE; - if (iocb->ki_flags & IOCB_DIRECT) { + if (direct_io) { map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint); - return f2fs_map_blocks(inode, &map, 1, - __force_buffered_io(inode, WRITE) ? - F2FS_GET_BLOCK_PRE_AIO : - F2FS_GET_BLOCK_PRE_DIO); + flag = __force_buffered_io(inode, WRITE) ? + F2FS_GET_BLOCK_PRE_AIO : + F2FS_GET_BLOCK_PRE_DIO; + goto map_blocks; } if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) { err = f2fs_convert_inline_inode(inode); if (err) return err; } - if (!f2fs_has_inline_data(inode)) { - err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); - if (map.m_len > 0 && err == -ENOSPC) { - set_inode_flag(inode, FI_NO_PREALLOC); - err = 0; - } + if (f2fs_has_inline_data(inode)) return err; + + flag = F2FS_GET_BLOCK_PRE_AIO; + +map_blocks: + err = f2fs_map_blocks(inode, &map, 1, flag); + if (map.m_len > 0 && err == -ENOSPC) { + if (!direct_io) + set_inode_flag(inode, FI_NO_PREALLOC); + err = 0; } return err; } From 7f1a45a5b65c8a5a16cfb262d9470424d374488b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 3 Jan 2018 17:32:51 +0800 Subject: [PATCH 36/72] f2fs: clean up unneeded declaration Commit 6afc662e68b5 ("f2fs: support flexible inline xattr size") declared f2fs_sb_has_flexible_inline_xattr in f2fs.h for latter being used in get_inline_xattr_addrs, but in latter version, related code has been changed, leave f2fs_sb_has_flexible_inline_xattr w/o any users. Let's remove it for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 46d3d89f3ac5..7d3ba65ab638 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2446,7 +2446,6 @@ static inline int get_extra_isize(struct inode *inode) return F2FS_I(inode)->i_extra_isize / sizeof(__le32); } -static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb); static inline int get_inline_xattr_addrs(struct inode *inode) { return F2FS_I(inode)->i_inline_xattr_size; From f66c027eaddd355a55625eb487a6c6fe8d3a1fc6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 3 Jan 2018 10:55:07 -0800 Subject: [PATCH 37/72] f2fs: show precise # of blocks that user/root can use Let's show precise # of blocks that user/root can use through bavail and bfree respectively. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0a820ba55b10..f1300cda6bfd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -994,20 +994,19 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) struct super_block *sb = dentry->d_sb; struct f2fs_sb_info *sbi = F2FS_SB(sb); u64 id = huge_encode_dev(sb->s_bdev->bd_dev); - block_t total_count, user_block_count, start_count, ovp_count; + block_t total_count, user_block_count, start_count; u64 avail_node_count; total_count = le64_to_cpu(sbi->raw_super->block_count); user_block_count = sbi->user_block_count; start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr); - ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; buf->f_type = F2FS_SUPER_MAGIC; buf->f_bsize = sbi->blocksize; buf->f_blocks = total_count - start_count; - buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count; - buf->f_bavail = user_block_count - valid_user_blocks(sbi) - + buf->f_bfree = user_block_count - valid_user_blocks(sbi) - sbi->current_reserved_blocks; + buf->f_bavail = buf->f_bfree; avail_node_count = sbi->total_node_count - sbi->nquota_files - F2FS_RESERVED_NODE_NUM; From 1eca05aa9d4d107a6f9e434b5041a775327a8a4b Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Wed, 3 Jan 2018 18:03:04 +0800 Subject: [PATCH 38/72] f2fs: update inode info to inode page for new file After checkpoint, 1. creat a new file A ,(with dirty inode && dirty inode page && xattr info) 2. backgroud wb write back file A inode page (without update from inode cache) 3. fsync file A, write back inode page of file A with inode cache info 4. sudden power off before new checkpoint In this case, recovery process will try to recover a zero inode page. Inline xattr flag of file A will be miss and xattr info will be taken as blkaddr index. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 35b079d98c17..b495a543819c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2207,7 +2207,9 @@ void recover_inline_xattr(struct inode *inode, struct page *page) f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage)); ri = F2FS_INODE(page); - if (!(ri->i_inline & F2FS_INLINE_XATTR)) { + if (ri->i_inline & F2FS_INLINE_XATTR) { + set_inode_flag(inode, FI_INLINE_XATTR); + } else { clear_inode_flag(inode, FI_INLINE_XATTR); goto update_inode; } From 2c1905042c8c3da45ec347d5397d1133ff30fce4 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Thu, 4 Jan 2018 15:02:02 +0800 Subject: [PATCH 39/72] f2fs: check segment type in __f2fs_replace_block In some case, the node blocks has wrong blkaddr whose segment type is NODE, e.g., recover inode has missing xattr flag and the blkaddr is in the xattr range. Since fsck.f2fs does not check the recovery nodes, this will cause __f2fs_replace_block change the curseg of node and do the update_sit_entry(sbi, new_blkaddr, 1) with no next_blkoff refresh, as a result, when recovery process write checkpoint and sync nodes, the next_blkoff of curseg is used in the segment bit map, then it will cause f2fs_bug_on. So let's check segment type in __f2fs_replace_block. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b03f65e5dfdc..f2842a8a1341 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2739,6 +2739,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } } + f2fs_bug_on(sbi, !IS_DATASEG(type)); curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); From 7e65be49ed94f89cbf5add7dadf6e338f0cc6e7b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 27 Dec 2017 15:05:52 -0800 Subject: [PATCH 40/72] f2fs: add reserved blocks for root user This patch allows root to reserve some blocks via mount option. "-o reserve_root=N" means N x 4KB-sized blocks for root only. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 20 ++++++++++++++++---- fs/f2fs/super.c | 37 ++++++++++++++++++++++++++++++++++++- fs/f2fs/sysfs.c | 3 ++- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7d3ba65ab638..541709042d53 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -95,6 +95,7 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_PRJQUOTA 0x00200000 #define F2FS_MOUNT_QUOTA 0x00400000 #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 +#define F2FS_MOUNT_RESERVE_ROOT 0x01000000 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) @@ -1105,6 +1106,7 @@ struct f2fs_sb_info { block_t last_valid_block_count; /* for recovery */ block_t reserved_blocks; /* configurable reserved blocks */ block_t current_reserved_blocks; /* current reserved blocks */ + block_t root_reserved_blocks; /* root reserved blocks */ unsigned int nquota_files; /* # of quota sysfile */ @@ -1583,11 +1585,17 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, sbi->total_valid_block_count += (block_t)(*count); avail_user_block_count = sbi->user_block_count - sbi->current_reserved_blocks; + + if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE))) + avail_user_block_count -= sbi->root_reserved_blocks; + if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; + if (diff > *count) + diff = *count; *count -= diff; release = diff; - sbi->total_valid_block_count = avail_user_block_count; + sbi->total_valid_block_count -= diff; if (!*count) { spin_unlock(&sbi->stat_lock); percpu_counter_sub(&sbi->alloc_valid_block_count, diff); @@ -1776,9 +1784,13 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); - valid_block_count = sbi->total_valid_block_count + 1; - if (unlikely(valid_block_count + sbi->current_reserved_blocks > - sbi->user_block_count)) { + valid_block_count = sbi->total_valid_block_count + + sbi->current_reserved_blocks + 1; + + if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE))) + valid_block_count += sbi->root_reserved_blocks; + + if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f1300cda6bfd..4904d1644052 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -107,6 +107,7 @@ enum { Opt_noextent_cache, Opt_noinline_data, Opt_data_flush, + Opt_reserve_root, Opt_mode, Opt_io_size_bits, Opt_fault_injection, @@ -157,6 +158,7 @@ static match_table_t f2fs_tokens = { {Opt_noextent_cache, "noextent_cache"}, {Opt_noinline_data, "noinline_data"}, {Opt_data_flush, "data_flush"}, + {Opt_reserve_root, "reserve_root=%u"}, {Opt_mode, "mode=%s"}, {Opt_io_size_bits, "io_bits=%u"}, {Opt_fault_injection, "fault_injection=%u"}, @@ -191,6 +193,19 @@ void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) va_end(args); } +static inline void limit_reserve_root(struct f2fs_sb_info *sbi) +{ + block_t limit = (sbi->user_block_count << 1) / 1000; + + /* limit is 0.2% */ + if (test_opt(sbi, RESERVE_ROOT) && sbi->root_reserved_blocks > limit) { + sbi->root_reserved_blocks = limit; + f2fs_msg(sbi->sb, KERN_INFO, + "Reduce reserved blocks for root = %u", + sbi->root_reserved_blocks); + } +} + static void init_once(void *foo) { struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo; @@ -488,6 +503,18 @@ static int parse_options(struct super_block *sb, char *options) case Opt_data_flush: set_opt(sbi, DATA_FLUSH); break; + case Opt_reserve_root: + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (test_opt(sbi, RESERVE_ROOT)) { + f2fs_msg(sb, KERN_INFO, + "Preserve previous reserve_root=%u", + sbi->root_reserved_blocks); + } else { + sbi->root_reserved_blocks = arg; + set_opt(sbi, RESERVE_ROOT); + } + break; case Opt_mode: name = match_strdup(&args[0]); @@ -1006,7 +1033,10 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) - sbi->current_reserved_blocks; - buf->f_bavail = buf->f_bfree; + if (buf->f_bfree > sbi->root_reserved_blocks) + buf->f_bavail = buf->f_bfree - sbi->root_reserved_blocks; + else + buf->f_bavail = 0; avail_node_count = sbi->total_node_count - sbi->nquota_files - F2FS_RESERVED_NODE_NUM; @@ -1135,6 +1165,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (test_opt(sbi, LFS)) seq_puts(seq, "lfs"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); + if (test_opt(sbi, RESERVE_ROOT)) + seq_printf(seq, ",reserve_root=%u", + sbi->root_reserved_blocks); if (F2FS_IO_SIZE_BITS(sbi)) seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -1333,6 +1366,7 @@ skip: sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); + limit_reserve_root(sbi); return 0; restore_gc: if (need_restart_gc) { @@ -2569,6 +2603,7 @@ try_onemore: sbi->last_valid_block_count = sbi->total_valid_block_count; sbi->reserved_blocks = 0; sbi->current_reserved_blocks = 0; + limit_reserve_root(sbi); for (i = 0; i < NR_INODE_TYPE; i++) { INIT_LIST_HEAD(&sbi->inode_list[i]); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 93c3364250dd..ab6028c332aa 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -162,7 +162,8 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, #endif if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); - if (t > (unsigned long)sbi->user_block_count) { + if (t > (unsigned long)(sbi->user_block_count - + sbi->root_reserved_blocks)) { spin_unlock(&sbi->stat_lock); return -EINVAL; } From 94b1e10e745424fe6183023ff5a525bf7d12f779 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 5 Jan 2018 09:41:20 +0000 Subject: [PATCH 41/72] f2fs: make local functions static Fixes the following sparse warnings: fs/f2fs/segment.c:887:6: warning: symbol '__check_sit_bitmap' was not declared. Should it be static? fs/f2fs/segment.c:1327:6: warning: symbol 'f2fs_wait_discard_bio' was not declared. Should it be static? fs/f2fs/super.c:1661:5: warning: symbol 'f2fs_get_projid' was not declared. Should it be static? Signed-off-by: Wei Yongjun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- fs/f2fs/super.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f2842a8a1341..b4b90980dc7b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -884,7 +884,7 @@ static void f2fs_submit_discard_endio(struct bio *bio) bio_put(bio); } -void __check_sit_bitmap(struct f2fs_sb_info *sbi, +static void __check_sit_bitmap(struct f2fs_sb_info *sbi, block_t start, block_t end) { #ifdef CONFIG_F2FS_CHECK_FS @@ -1324,7 +1324,7 @@ static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, } /* This should be covered by global mutex, &sit_i->sentry_lock */ -void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *dc; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4904d1644052..8b86a483ddd7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1691,7 +1691,7 @@ void f2fs_quota_off_umount(struct super_block *sb) f2fs_quota_off(sb, type); } -int f2fs_get_projid(struct inode *inode, kprojid_t *projid) +static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) { *projid = F2FS_I(inode)->i_projid; return 0; From 49c60c67d235647c0bec7c68982539f3a6e8bbcd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Jan 2018 18:48:33 +0800 Subject: [PATCH 42/72] f2fs: avoid high cpu usage in discard thread We take very long time to finish generic/476, this is because we will check consistence of all discard entries in global rb tree while traversing all different granularity pending lists, even when the list is empty, in order to avoid that unneeded overhead, we have to skip the check when coming up an empty list. generic/476 time consumption: cost Before patch & w/o consistence check 57s Before patch & w/ consistence check 1426s After patch 78s Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b4b90980dc7b..35009dbacd46 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1204,6 +1204,8 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, pend_list = &dcc->pend_list[i]; mutex_lock(&dcc->cmd_lock); + if (list_empty(pend_list)) + goto next; f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { @@ -1222,6 +1224,7 @@ skip: break; } blk_finish_plug(&plug); +next: mutex_unlock(&dcc->cmd_lock); if (iter >= dpolicy->max_requests) From bffa8d3b0040446f18961ab7de8e73fdca7fe4f0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Jan 2018 18:48:34 +0800 Subject: [PATCH 43/72] f2fs: remove unused pend_list_tag In commit 78997b569f56 ("f2fs: split discard policy"), we have get rid of using pend_list_tag field in struct discard_cmd_control, but forgot to remove it, now do it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 541709042d53..fdbaa8fcec2f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -267,7 +267,6 @@ struct discard_cmd_control { struct task_struct *f2fs_issue_discard; /* discard thread */ struct list_head entry_list; /* 4KB discard entry list */ struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ - unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */ struct list_head wait_list; /* store on-flushing entries */ struct list_head fstrim_list; /* in-flight discard from fstrim */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ From 578c647879f74c333d20762375fd970907f2e97c Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Tue, 9 Jan 2018 19:33:39 +0800 Subject: [PATCH 44/72] f2fs: implement cgroup writeback support Cgroup writeback requires explicit support from the filesystem. f2fs's data and node writeback IOs go through __write_data_page, which sets fio for submiting IOs. So, we add io_wbc for fio, associate bios with blkcg by invoking wbc_init_bio() and account IOs issuing by wbc_account_io(). In addtion, f2fs_fill_super() is updated to set SB_I_CGROUPWB. Meta writeback IOs is left alone by this patch and will always be attributed to the root cgroup. The results show that f2fs can throttle writeback nicely for data writing and file creating. Reviewed-by: Chao Yu Signed-off-by: Yufen Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++++++++-- fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 1 + fs/f2fs/super.c | 1 + 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a023863ef27f..304b899a6892 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -170,6 +170,7 @@ static bool __same_bdev(struct f2fs_sb_info *sbi, * Low-level block read/write IO operations. */ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, + struct writeback_control *wbc, int npages, bool is_read) { struct bio *bio; @@ -179,6 +180,8 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, f2fs_target_device(sbi, blk_addr, bio); bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; bio->bi_private = is_read ? NULL : sbi; + if (wbc) + wbc_init_bio(wbc, bio); return bio; } @@ -374,7 +377,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) f2fs_trace_ios(fio, 0); /* Allocate a new bio */ - bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->op)); + bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, + 1, is_read_io(fio->op)); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); @@ -436,7 +440,7 @@ alloc_new: dec_page_count(sbi, WB_DATA_TYPE(bio_page)); goto out_fail; } - io->bio = __bio_alloc(sbi, fio->new_blkaddr, + io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc, BIO_MAX_PAGES, false); io->fio = *fio; } @@ -446,6 +450,9 @@ alloc_new: goto alloc_new; } + if (fio->io_wbc) + wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE); + io->last_block_in_bio = fio->new_blkaddr; f2fs_trace_ios(fio, 0); @@ -1529,6 +1536,7 @@ static int __write_data_page(struct page *page, bool *submitted, .submitted = false, .need_lock = LOCK_RETRY, .io_type = io_type, + .io_wbc = wbc, }; trace_f2fs_writepage(page, DATA); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fdbaa8fcec2f..757c1ef742ea 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -961,6 +961,7 @@ struct f2fs_io_info { int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ enum iostat_type io_type; /* io type */ + struct writeback_control *io_wbc; /* writeback control */ }; #define is_read_io(rw) ((rw) == READ) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b495a543819c..ea29314ca8a3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1335,6 +1335,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, .encrypted_page = NULL, .submitted = false, .io_type = io_type, + .io_wbc = wbc, }; trace_f2fs_writepage(page, NODE); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8b86a483ddd7..ec489b205443 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2518,6 +2518,7 @@ try_onemore: sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); + sb->s_iflags |= SB_I_CGROUPWB; /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; From 7c2e59632b846a9ea08a2ac985c651b752e08e3c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 4 Jan 2018 21:36:09 -0800 Subject: [PATCH 45/72] f2fs: add resgid and resuid to reserve root blocks This patch adds mount options to reserve some blocks via resgid=%u,resuid=%u. It only activates with reserve_root=%u. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 27 +++++++++++++++++++++++++-- fs/f2fs/super.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 757c1ef742ea..e9299463be20 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -131,6 +132,12 @@ struct f2fs_mount_info { #define F2FS_CLEAR_FEATURE(sb, mask) \ (F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)) +/* + * Default values for user and/or group using reserved blocks + */ +#define F2FS_DEF_RESUID 0 +#define F2FS_DEF_RESGID 0 + /* * For checkpoint manager */ @@ -1107,6 +1114,8 @@ struct f2fs_sb_info { block_t reserved_blocks; /* configurable reserved blocks */ block_t current_reserved_blocks; /* current reserved blocks */ block_t root_reserved_blocks; /* root reserved blocks */ + kuid_t s_resuid; /* reserved blocks for uid */ + kgid_t s_resgid; /* reserved blocks for gid */ unsigned int nquota_files; /* # of quota sysfile */ @@ -1556,6 +1565,20 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } +static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi) +{ + if (!test_opt(sbi, RESERVE_ROOT)) + return false; + if (capable(CAP_SYS_RESOURCE)) + return true; + if (uid_eq(sbi->s_resuid, current_fsuid())) + return true; + if (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && + in_group_p(sbi->s_resgid)) + return true; + return false; +} + static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) @@ -1586,7 +1609,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, avail_user_block_count = sbi->user_block_count - sbi->current_reserved_blocks; - if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE))) + if (!__allow_reserved_blocks(sbi)) avail_user_block_count -= sbi->root_reserved_blocks; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { @@ -1787,7 +1810,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, valid_block_count = sbi->total_valid_block_count + sbi->current_reserved_blocks + 1; - if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE))) + if (!__allow_reserved_blocks(sbi)) valid_block_count += sbi->root_reserved_blocks; if (unlikely(valid_block_count > sbi->user_block_count)) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ec489b205443..2bfd09812d2a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -108,6 +108,8 @@ enum { Opt_noinline_data, Opt_data_flush, Opt_reserve_root, + Opt_resgid, + Opt_resuid, Opt_mode, Opt_io_size_bits, Opt_fault_injection, @@ -159,6 +161,8 @@ static match_table_t f2fs_tokens = { {Opt_noinline_data, "noinline_data"}, {Opt_data_flush, "data_flush"}, {Opt_reserve_root, "reserve_root=%u"}, + {Opt_resgid, "resgid=%u"}, + {Opt_resuid, "resuid=%u"}, {Opt_mode, "mode=%s"}, {Opt_io_size_bits, "io_bits=%u"}, {Opt_fault_injection, "fault_injection=%u"}, @@ -204,6 +208,15 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) "Reduce reserved blocks for root = %u", sbi->root_reserved_blocks); } + if (!test_opt(sbi, RESERVE_ROOT) && + (!uid_eq(sbi->s_resuid, + make_kuid(&init_user_ns, F2FS_DEF_RESUID)) || + !gid_eq(sbi->s_resgid, + make_kgid(&init_user_ns, F2FS_DEF_RESGID)))) + f2fs_msg(sbi->sb, KERN_INFO, + "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", + from_kuid_munged(&init_user_ns, sbi->s_resuid), + from_kgid_munged(&init_user_ns, sbi->s_resgid)); } static void init_once(void *foo) @@ -336,6 +349,8 @@ static int parse_options(struct super_block *sb, char *options) substring_t args[MAX_OPT_ARGS]; char *p, *name; int arg = 0; + kuid_t uid; + kgid_t gid; #ifdef CONFIG_QUOTA int ret; #endif @@ -515,6 +530,28 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, RESERVE_ROOT); } break; + case Opt_resuid: + if (args->from && match_int(args, &arg)) + return -EINVAL; + uid = make_kuid(current_user_ns(), arg); + if (!uid_valid(uid)) { + f2fs_msg(sb, KERN_ERR, + "Invalid uid value %d", arg); + return -EINVAL; + } + sbi->s_resuid = uid; + break; + case Opt_resgid: + if (args->from && match_int(args, &arg)) + return -EINVAL; + gid = make_kgid(current_user_ns(), arg); + if (!gid_valid(gid)) { + f2fs_msg(sb, KERN_ERR, + "Invalid gid value %d", arg); + return -EINVAL; + } + sbi->s_resgid = gid; + break; case Opt_mode: name = match_strdup(&args[0]); @@ -1166,8 +1203,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, "lfs"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); if (test_opt(sbi, RESERVE_ROOT)) - seq_printf(seq, ",reserve_root=%u", - sbi->root_reserved_blocks); + seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u", + sbi->root_reserved_blocks, + from_kuid_munged(&init_user_ns, sbi->s_resuid), + from_kgid_munged(&init_user_ns, sbi->s_resgid)); if (F2FS_IO_SIZE_BITS(sbi)) seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -2455,6 +2494,9 @@ try_onemore: sb->s_fs_info = sbi; sbi->raw_super = raw_super; + sbi->s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); + sbi->s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); + /* precompute checksum seed for metadata */ if (f2fs_sb_has_inode_chksum(sb)) sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, From f1d2564a7cc9d2f49399094cfe3c46c9d4930b7b Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Wed, 10 Jan 2018 16:49:10 +0900 Subject: [PATCH 46/72] f2fs: handle newly created page when revoking inmem pages When committing inmem pages is successful, we revoke already committed blocks in __revoke_inmem_pages() and finally replace the committed ones with the old blocks using f2fs_replace_block(). However, if the committed block was newly created one, the address of the old block is NEW_ADDR and __f2fs_replace_block() cannot handle NEW_ADDR as new_blkaddr properly and a kernel panic occurrs. Signed-off-by: Daeho Jeong Tested-by: Shu Tan Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 35009dbacd46..2e8e054db49d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -248,7 +248,11 @@ retry: goto next; } get_node_info(sbi, dn.nid, &ni); - f2fs_replace_block(sbi, &dn, dn.data_blkaddr, + if (cur->old_addr == NEW_ADDR) { + invalidate_blocks(sbi, dn.data_blkaddr); + f2fs_update_data_blkaddr(&dn, NEW_ADDR); + } else + f2fs_replace_block(sbi, &dn, dn.data_blkaddr, cur->old_addr, ni.version, true, true); f2fs_put_dnode(&dn); } From 25a912e51af7c0761a6d3424640901ded517e201 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 10 Jan 2018 18:18:51 +0800 Subject: [PATCH 47/72] f2fs: fix to caclulate required free section correctly When calculating required free section during file defragmenting, we should skip holes in file, otherwise we will probably fail to defrag sparse file with large size. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 322aab9d91b5..05779077cbf2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2125,10 +2125,12 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, continue; } - if (blk_end && blk_end != map.m_pblk) { + if (blk_end && blk_end != map.m_pblk) fragmented = true; - break; - } + + /* record total count of block that we're going to move */ + total += map.m_len; + blk_end = map.m_pblk + map.m_len; map.m_lblk += map.m_len; @@ -2137,10 +2139,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, if (!fragmented) goto out; - map.m_lblk = pg_start; - map.m_len = pg_end - pg_start; - - sec_num = (map.m_len + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); + sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); /* * make sure there are enough free section for LFS allocation, this can @@ -2152,6 +2151,10 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, goto out; } + map.m_lblk = pg_start; + map.m_len = pg_end - pg_start; + total = 0; + while (map.m_lblk < pg_end) { pgoff_t idx; int cnt = 0; From 7dff55d27e07c571b6c532b714c22d7ce9fba1e0 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 11 Jan 2018 14:19:32 +0800 Subject: [PATCH 48/72] f2fs: check node page again in write end io Check node page again in write end io in case of data corruption during inflght IO. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 304b899a6892..31add841ec39 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -114,6 +114,10 @@ static void f2fs_write_end_io(struct bio *bio) if (type == F2FS_WB_CP_DATA) f2fs_stop_checkpoint(sbi, true); } + + f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && + page->index != nid_of_node(page)); + dec_page_count(sbi, type); clear_cold_data(page); end_page_writeback(page); From f1b43d4cd5f2563f0f1bb2c84eff94faa4c2853b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 11 Jan 2018 14:37:35 +0800 Subject: [PATCH 49/72] f2fs: fix to cover f2fs_inline_data_fiemap with inode_lock This patch fix to cover f2fs_inline_data_fiemap with inode_lock in order to make that interface avoiding race with mapping change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 31add841ec39..664fe0aa18bb 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1154,14 +1154,14 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (ret) return ret; + inode_lock(inode); + if (f2fs_has_inline_data(inode)) { ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len); if (ret != -EAGAIN) - return ret; + goto out; } - inode_lock(inode); - if (logical_to_blk(inode, len) == 0) len = blk_to_logical(inode, 1); From 442a9dbd577e38211d296f35443b5e257bb5a9b3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 11 Jan 2018 14:39:57 +0800 Subject: [PATCH 50/72] f2fs: support FIEMAP_FLAG_XATTR This patch enables ->fiemap to handle FIEMAP_FLAG_XATTR flag for xattr mapping info lookup purpose. It makes f2fs passing generic/425 test in fstest. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 664fe0aa18bb..cde2a3264b4c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1140,6 +1140,68 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) return (blk << inode->i_blkbits); } +static int f2fs_xattr_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct page *page; + struct node_info ni; + __u64 phys = 0, len; + __u32 flags; + nid_t xnid = F2FS_I(inode)->i_xattr_nid; + int err = 0; + + if (f2fs_has_inline_xattr(inode)) { + int offset; + + page = f2fs_grab_cache_page(NODE_MAPPING(sbi), + inode->i_ino, false); + if (!page) + return -ENOMEM; + + get_node_info(sbi, inode->i_ino, &ni); + + phys = (__u64)blk_to_logical(inode, ni.blk_addr); + offset = offsetof(struct f2fs_inode, i_addr) + + sizeof(__le32) * (DEF_ADDRS_PER_INODE - + F2FS_INLINE_XATTR_ADDRS(inode)); + + phys += offset; + len = inline_xattr_size(inode); + + f2fs_put_page(page, 1); + + flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED; + + if (!xnid) + flags |= FIEMAP_EXTENT_LAST; + + err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); + if (err || err == 1) + return err; + } + + if (xnid) { + page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false); + if (!page) + return -ENOMEM; + + get_node_info(sbi, xnid, &ni); + + phys = (__u64)blk_to_logical(inode, ni.blk_addr); + len = inode->i_sb->s_blocksize; + + f2fs_put_page(page, 1); + + flags = FIEMAP_EXTENT_LAST; + } + + if (phys) + err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); + + return (err < 0 ? err : 0); +} + int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { @@ -1150,12 +1212,17 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u32 flags = 0; int ret = 0; - ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); + ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR); if (ret) return ret; inode_lock(inode); + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { + ret = f2fs_xattr_fiemap(inode, fieinfo); + goto out; + } + if (f2fs_has_inline_data(inode)) { ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len); if (ret != -EAGAIN) From 9ac1e2d88d076aa1ae9e33d44a9bbc8ae3bfa791 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 11 Jan 2018 11:26:19 +0900 Subject: [PATCH 51/72] f2fs: prevent newly created inode from being dirtied incorrectly Now, we invoke f2fs_mark_inode_dirty_sync() to make an inode dirty in advance of creating a new node page for the inode. By this, some inodes whose node page is not created yet can be linked into the global dirty list. If the checkpoint is executed at this moment, the inode will be written back by writeback_single_inode() and finally update_inode_page() will fail to detach the inode from the global dirty list because the inode doesn't have a node page. The problem is that the inode's state in VFS layer will become clean after execution of writeback_single_inode() and it's still linked in the global dirty list of f2fs and this will cause a kernel panic. So, we will prevent the newly created inode from being dirtied during the FI_NEW_INODE flag of the inode is set. We will make it dirty right after the flag is cleared. Signed-off-by: Daeho Jeong Signed-off-by: Youngjin Gil Tested-by: Hobin Woo Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/inode.c | 3 +++ fs/f2fs/namei.c | 4 ++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e9299463be20..1b2b0acbc83d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2150,6 +2150,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_INLINE_XATTR: case FI_INLINE_DATA: case FI_INLINE_DENTRY: + case FI_NEW_INODE: if (set) return; case FI_DATA_EXIST: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 234322889e65..1dc77a40d0ad 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -22,6 +22,9 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) { + if (is_inode_flag_set(inode, FI_NEW_INODE)) + return; + if (f2fs_inode_dirtied(inode, sync)) return; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a885c6e659f8..3ee97ba9d2d7 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -74,12 +74,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (err) goto fail_drop; + set_inode_flag(inode, FI_NEW_INODE); + /* If the directory encrypted, then we should encrypt the inode. */ if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); - set_inode_flag(inode, FI_NEW_INODE); - if (f2fs_sb_has_extra_attr(sbi->sb)) { set_inode_flag(inode, FI_EXTRA_ATTR); F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE; From 1ad71a27124caf0b68ddd3c92be01aa2b2a72b2a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Dec 2017 16:25:39 -0800 Subject: [PATCH 52/72] f2fs: add an ioctl to disable GC for specific file This patch gives a flag to disable GC on given file, which would be useful, when user wants to keep its block map. It also conducts in-place-update for dontmove file. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 + fs/f2fs/f2fs.h | 29 +++++++++++++- fs/f2fs/file.c | 83 +++++++++++++++++++++++++++++++++++++++++ fs/f2fs/gc.c | 11 ++++++ fs/f2fs/gc.h | 2 + fs/f2fs/sysfs.c | 2 + include/linux/f2fs_fs.h | 9 ++++- 7 files changed, 136 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cde2a3264b4c..945f15607484 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1479,6 +1479,8 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; + if (f2fs_is_pinned_file(inode)) + return true; if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) return false; if (is_cold_data(fio->page)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1b2b0acbc83d..d5090b4d2633 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -356,6 +356,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_GARBAGE_COLLECT_RANGE _IOW(F2FS_IOCTL_MAGIC, 11, \ struct f2fs_gc_range) #define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, __u32) +#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) +#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -593,7 +595,10 @@ struct f2fs_inode_info { unsigned long i_flags; /* keep an inode flags for ioctl */ unsigned char i_advise; /* use to give file attribute hints */ unsigned char i_dir_level; /* use for dentry level for large dir */ - unsigned int i_current_depth; /* use only in directory structure */ + union { + unsigned int i_current_depth; /* only for directory depth */ + unsigned short i_gc_failures; /* only for regular file */ + }; unsigned int i_pino; /* parent inode number */ umode_t i_acl_mode; /* keep file acl mode temporarily */ @@ -1142,6 +1147,9 @@ struct f2fs_sb_info { /* threshold for converting bg victims for fg */ u64 fggc_threshold; + /* threshold for gc trials on pinned files */ + u64 gc_pin_file_threshold; + /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; @@ -2141,6 +2149,7 @@ enum { FI_HOT_DATA, /* indicate file is hot */ FI_EXTRA_ATTR, /* indicate file has extra attribute */ FI_PROJ_INHERIT, /* indicate file inherits projectid */ + FI_PIN_FILE, /* indicate file should not be gced */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, @@ -2155,6 +2164,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, return; case FI_DATA_EXIST: case FI_INLINE_DOTS: + case FI_PIN_FILE: f2fs_mark_inode_dirty_sync(inode, true); } } @@ -2235,6 +2245,13 @@ static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) f2fs_mark_inode_dirty_sync(inode, true); } +static inline void f2fs_i_gc_failures_write(struct inode *inode, + unsigned int count) +{ + F2FS_I(inode)->i_gc_failures = count; + f2fs_mark_inode_dirty_sync(inode, true); +} + static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid) { F2FS_I(inode)->i_xattr_nid = xnid; @@ -2263,6 +2280,8 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) set_bit(FI_INLINE_DOTS, &fi->flags); if (ri->i_inline & F2FS_EXTRA_ATTR) set_bit(FI_EXTRA_ATTR, &fi->flags); + if (ri->i_inline & F2FS_PIN_FILE) + set_bit(FI_PIN_FILE, &fi->flags); } static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) @@ -2281,6 +2300,8 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_INLINE_DOTS; if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) ri->i_inline |= F2FS_EXTRA_ATTR; + if (is_inode_flag_set(inode, FI_PIN_FILE)) + ri->i_inline |= F2FS_PIN_FILE; } static inline int f2fs_has_extra_attr(struct inode *inode) @@ -2326,6 +2347,11 @@ static inline int f2fs_has_inline_dots(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_DOTS); } +static inline bool f2fs_is_pinned_file(struct inode *inode) +{ + return is_inode_flag_set(inode, FI_PIN_FILE); +} + static inline bool f2fs_is_atomic_file(struct inode *inode) { return is_inode_flag_set(inode, FI_ATOMIC_FILE); @@ -2540,6 +2566,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); void truncate_data_blocks_range(struct dnode_of_data *dn, int count); long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int f2fs_pin_file_control(struct inode *inode, bool inc); /* * inode.c diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 05779077cbf2..493457a200b1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2672,6 +2672,83 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) return 0; } +int f2fs_pin_file_control(struct inode *inode, bool inc) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + /* Use i_gc_failures for normal file as a risk signal. */ + if (inc) + f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); + + if (fi->i_gc_failures > sbi->gc_pin_file_threshold) { + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: Enable GC = ino %lx after %x GC trials\n", + __func__, inode->i_ino, fi->i_gc_failures); + clear_inode_flag(inode, FI_PIN_FILE); + return -EAGAIN; + } + return 0; +} + +static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u32 pin; + int ret = 0; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (get_user(pin, (__u32 __user *)arg)) + return -EFAULT; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + return -EROFS; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + inode_lock(inode); + + if (!pin) { + clear_inode_flag(inode, FI_PIN_FILE); + F2FS_I(inode)->i_gc_failures = 1; + goto done; + } + + if (f2fs_pin_file_control(inode, false)) { + ret = -EAGAIN; + goto out; + } + ret = f2fs_convert_inline_inode(inode); + if (ret) + goto out; + + set_inode_flag(inode, FI_PIN_FILE); + ret = F2FS_I(inode)->i_gc_failures; +done: + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); +out: + inode_unlock(inode); + mnt_drop_write_file(filp); + return ret; +} + +static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u32 pin = 0; + + if (is_inode_flag_set(inode, FI_PIN_FILE)) + pin = F2FS_I(inode)->i_gc_failures; + return put_user(pin, (u32 __user *)arg); +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -2722,6 +2799,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_fsgetxattr(filp, arg); case F2FS_IOC_FSSETXATTR: return f2fs_ioc_fssetxattr(filp, arg); + case F2FS_IOC_GET_PIN_FILE: + return f2fs_ioc_get_pin_file(filp, arg); + case F2FS_IOC_SET_PIN_FILE: + return f2fs_ioc_set_pin_file(filp, arg); default: return -ENOTTY; } @@ -2797,6 +2878,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_FEATURES: case F2FS_IOC_FSGETXATTR: case F2FS_IOC_FSSETXATTR: + case F2FS_IOC_GET_PIN_FILE: + case F2FS_IOC_SET_PIN_FILE: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d844dcb80570..33e79697e41c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -624,6 +624,11 @@ static void move_data_block(struct inode *inode, block_t bidx, if (f2fs_is_atomic_file(inode)) goto out; + if (f2fs_is_pinned_file(inode)) { + f2fs_pin_file_control(inode, true); + goto out; + } + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); if (err) @@ -720,6 +725,11 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, if (f2fs_is_atomic_file(inode)) goto out; + if (f2fs_is_pinned_file(inode)) { + if (gc_type == FG_GC) + f2fs_pin_file_control(inode, true); + goto out; + } if (gc_type == BG_GC) { if (PageWriteback(page)) @@ -1091,6 +1101,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi) sbi->fggc_threshold = div64_u64((main_count - ovp_count) * BLKS_PER_SEC(sbi), (main_count - resv_count)); + sbi->gc_pin_file_threshold = DEF_GC_FAILED_PINNED_FILES; /* give warm/cold data area from slower device */ if (sbi->s_ndevs && sbi->segs_per_sec == 1) diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 9325191fab2d..b0045d4c8d1e 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -20,6 +20,8 @@ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ +#define DEF_GC_FAILED_PINNED_FILES 2048 + /* Search max. number of dirty segments to select a victim segment */ #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index ab6028c332aa..41887e6ec1b3 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -301,6 +301,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); @@ -349,6 +350,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(idle_interval), ATTR_LIST(iostat_enable), ATTR_LIST(readdir_ra), + ATTR_LIST(gc_pin_file_thresh), #ifdef CONFIG_F2FS_FAULT_INJECTION ATTR_LIST(inject_rate), ATTR_LIST(inject_type), diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 43e98d30d2df..cfb522e6affc 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -212,6 +212,7 @@ struct f2fs_extent { #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ #define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ #define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */ +#define F2FS_PIN_FILE 0x40 /* file should not be gced */ struct f2fs_inode { __le16 i_mode; /* file mode */ @@ -229,7 +230,13 @@ struct f2fs_inode { __le32 i_ctime_nsec; /* change time in nano scale */ __le32 i_mtime_nsec; /* modification time in nano scale */ __le32 i_generation; /* file version (for NFS) */ - __le32 i_current_depth; /* only for directory depth */ + union { + __le32 i_current_depth; /* only for directory depth */ + __le16 i_gc_failures; /* + * # of gc failures on pinned file. + * only for regular files. + */ + }; __le32 i_xattr_nid; /* nid to save xattr */ __le32 i_flags; /* file attributes */ __le32 i_pino; /* parent inode number */ From c4020b2da4c9e84d63e30ce5a85dc287507f0e60 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 11 Jan 2018 14:42:30 +0800 Subject: [PATCH 53/72] f2fs: support F2FS_IOC_PRECACHE_EXTENTS This patch introduces a new ioctl F2FS_IOC_PRECACHE_EXTENTS to precache extent info like ext4, in order to gain better performance during triggering AIO by eliminating synchronous waiting of mapping info. Referred commit: 7869a4a6c5ca ("ext4: add support for extent pre-caching") In addition, with newly added extent precache abilitiy, this patch add to support FIEMAP_FLAG_CACHE in ->fiemap. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 4 ++++ fs/f2fs/file.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 945f15607484..28a3328f1617 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -865,6 +865,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_len = 0; map.m_next_pgofs = NULL; + map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; if (direct_io) { @@ -931,6 +932,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, blkcnt_t prealloc; struct extent_info ei = {0,0,0}; block_t blkaddr; + unsigned int start_pgofs; if (!maxblocks) return 0; @@ -946,6 +948,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, map->m_pblk = ei.blk + pgofs - ei.fofs; map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); map->m_flags = F2FS_MAP_MAPPED; + if (map->m_next_extent) + *map->m_next_extent = pgofs + map->m_len; goto out; } @@ -964,10 +968,14 @@ next_dnode: if (map->m_next_pgofs) *map->m_next_pgofs = get_next_page_offset(&dn, pgofs); + if (map->m_next_extent) + *map->m_next_extent = + get_next_page_offset(&dn, pgofs); } goto unlock_out; } + start_pgofs = pgofs; prealloc = 0; last_ofs_in_node = ofs_in_node = dn.ofs_in_node; end_offset = ADDRS_PER_PAGE(dn.node_page, inode); @@ -1001,6 +1009,8 @@ next_block: map->m_pblk = 0; goto sync_out; } + if (flag == F2FS_GET_BLOCK_PRECACHE) + goto sync_out; if (flag == F2FS_GET_BLOCK_FIEMAP && blkaddr == NULL_ADDR) { if (map->m_next_pgofs) @@ -1059,6 +1069,16 @@ skip: else if (dn.ofs_in_node < end_offset) goto next_block; + if (flag == F2FS_GET_BLOCK_PRECACHE) { + if (map->m_flags & F2FS_MAP_MAPPED) { + unsigned int ofs = start_pgofs - map->m_lblk; + + f2fs_update_extent_cache_range(&dn, + start_pgofs, map->m_pblk + ofs, + map->m_len - ofs); + } + } + f2fs_put_dnode(&dn); if (create) { @@ -1068,6 +1088,17 @@ skip: goto next_dnode; sync_out: + if (flag == F2FS_GET_BLOCK_PRECACHE) { + if (map->m_flags & F2FS_MAP_MAPPED) { + unsigned int ofs = start_pgofs - map->m_lblk; + + f2fs_update_extent_cache_range(&dn, + start_pgofs, map->m_pblk + ofs, + map->m_len - ofs); + } + if (map->m_next_extent) + *map->m_next_extent = pgofs + 1; + } f2fs_put_dnode(&dn); unlock_out: if (create) { @@ -1089,6 +1120,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, map.m_lblk = iblock; map.m_len = bh->b_size >> inode->i_blkbits; map.m_next_pgofs = next_pgofs; + map.m_next_extent = NULL; map.m_seg_type = seg_type; err = f2fs_map_blocks(inode, &map, create, flag); @@ -1212,6 +1244,12 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u32 flags = 0; int ret = 0; + if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { + ret = f2fs_precache_extents(inode); + if (ret) + return ret; + } + ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR); if (ret) return ret; @@ -1313,6 +1351,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, map.m_len = 0; map.m_flags = 0; map.m_next_pgofs = NULL; + map.m_next_extent = NULL; map.m_seg_type = NO_CHECK_TYPE; for (; nr_pages; nr_pages--) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d5090b4d2633..7a85298a74eb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -358,6 +358,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, __u32) #define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) #define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) +#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -553,6 +554,7 @@ struct f2fs_map_blocks { unsigned int m_len; unsigned int m_flags; pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */ + pgoff_t *m_next_extent; /* point to next possible extent */ int m_seg_type; }; @@ -563,6 +565,7 @@ enum { F2FS_GET_BLOCK_BMAP, F2FS_GET_BLOCK_PRE_DIO, F2FS_GET_BLOCK_PRE_AIO, + F2FS_GET_BLOCK_PRECACHE, }; /* @@ -2564,6 +2567,7 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, int f2fs_setattr(struct dentry *dentry, struct iattr *attr); int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); void truncate_data_blocks_range(struct dnode_of_data *dn, int count); +int f2fs_precache_extents(struct inode *inode); long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int f2fs_pin_file_control(struct inode *inode, bool inc); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 493457a200b1..58c9e3d8b8ec 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1423,7 +1423,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_map_blocks map = { .m_next_pgofs = NULL, - .m_seg_type = NO_CHECK_TYPE }; + .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE }; pgoff_t pg_end; loff_t new_size = i_size_read(inode); loff_t off_end; @@ -2072,7 +2072,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, { struct inode *inode = file_inode(filp); struct f2fs_map_blocks map = { .m_next_pgofs = NULL, - .m_seg_type = NO_CHECK_TYPE }; + .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE }; struct extent_info ei = {0,0,0}; pgoff_t pg_start, pg_end; unsigned int blk_per_seg = sbi->blocks_per_seg; @@ -2749,6 +2749,43 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) return put_user(pin, (u32 __user *)arg); } +int f2fs_precache_extents(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_map_blocks map; + pgoff_t m_next_extent; + loff_t end; + int err; + + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + return -EOPNOTSUPP; + + map.m_lblk = 0; + map.m_next_pgofs = NULL; + map.m_next_extent = &m_next_extent; + map.m_seg_type = NO_CHECK_TYPE; + end = F2FS_I_SB(inode)->max_file_blocks; + + while (map.m_lblk < end) { + map.m_len = end - map.m_lblk; + + down_write(&fi->dio_rwsem[WRITE]); + err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE); + up_write(&fi->dio_rwsem[WRITE]); + if (err) + return err; + + map.m_lblk = m_next_extent; + } + + return err; +} + +static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg) +{ + return f2fs_precache_extents(file_inode(filp)); +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -2803,6 +2840,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_get_pin_file(filp, arg); case F2FS_IOC_SET_PIN_FILE: return f2fs_ioc_set_pin_file(filp, arg); + case F2FS_IOC_PRECACHE_EXTENTS: + return f2fs_ioc_precache_extents(filp, arg); default: return -ENOTTY; } @@ -2880,6 +2919,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_FSSETXATTR: case F2FS_IOC_GET_PIN_FILE: case F2FS_IOC_SET_PIN_FILE: + case F2FS_IOC_PRECACHE_EXTENTS: break; default: return -ENOIOCTLCMD; From f3d98e74fcddb23dba00a88145272fc8223baaee Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 10 Jan 2018 18:18:52 +0800 Subject: [PATCH 54/72] f2fs: speed up defragment on sparse file We have supported to get next page offset with valid mapping crossing hole in f2fs_map_blocks, utilizing it to speed up defragment on sparse file. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++++- fs/f2fs/file.c | 11 ++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 28a3328f1617..81fd4823a071 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1017,8 +1017,12 @@ next_block: *map->m_next_pgofs = pgofs + 1; goto sync_out; } - if (flag != F2FS_GET_BLOCK_FIEMAP) + if (flag != F2FS_GET_BLOCK_FIEMAP) { + /* for defragment case */ + if (map->m_next_pgofs) + *map->m_next_pgofs = pgofs + 1; goto sync_out; + } } } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 58c9e3d8b8ec..9553f80dc677 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2071,10 +2071,10 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, struct f2fs_defragment *range) { struct inode *inode = file_inode(filp); - struct f2fs_map_blocks map = { .m_next_pgofs = NULL, - .m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE }; + struct f2fs_map_blocks map = { .m_next_extent = NULL, + .m_seg_type = NO_CHECK_TYPE }; struct extent_info ei = {0,0,0}; - pgoff_t pg_start, pg_end; + pgoff_t pg_start, pg_end, next_pgofs; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; block_t blk_end = 0; @@ -2108,6 +2108,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, } map.m_lblk = pg_start; + map.m_next_pgofs = &next_pgofs; /* * lookup mapping info in dnode page cache, skip defragmenting if all @@ -2121,7 +2122,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, goto out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { - map.m_lblk++; + map.m_lblk = next_pgofs; continue; } @@ -2166,7 +2167,7 @@ do_map: goto clear_out; if (!(map.m_flags & F2FS_MAP_FLAGS)) { - map.m_lblk++; + map.m_lblk = next_pgofs; continue; } From a2e2e76b23038b187e5656c467ec76eeb29b8275 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 15 Jan 2018 17:16:46 +0800 Subject: [PATCH 55/72] f2fs: fix to drop all inmem pages correctly In commit 57864ae5ce3a ("f2fs: limit # of inmemory pages"), we have limited memory footprint of all inmem pages with 20% of total memory, otherwise, if we exceed the threshold, we will try to drop all inmem pages to avoid excessive memory pressure resulting in performance regression. But in some unrelated error paths, we will also drop all inmem pages, which should be wrong, fix it in this patch. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 81fd4823a071..2db26388b6bf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2078,7 +2078,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *page = NULL; pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT; - bool need_balance = false; + bool need_balance = false, drop_atomic = false; block_t blkaddr = NULL_ADDR; int err = 0; @@ -2087,6 +2087,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, if (f2fs_is_atomic_file(inode) && !available_free_memory(sbi, INMEM_PAGES)) { err = -ENOMEM; + drop_atomic = true; goto fail; } @@ -2167,7 +2168,7 @@ repeat: fail: f2fs_put_page(page, 1); f2fs_write_failed(mapping, pos + len); - if (f2fs_is_atomic_file(inode)) + if (drop_atomic) drop_inmem_pages_all(sbi); return err; } From d8a9a22992f97cb3e2a7f6e706ecd510d5783e44 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 Jan 2018 16:02:36 -0800 Subject: [PATCH 56/72] f2fs: allow quota to use reserved blocks This patch allows quota to use reserved blocks all the time. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7a85298a74eb..269f9dec3d49 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1576,10 +1576,15 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } -static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi) +static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, + struct inode *inode) { + if (!inode) + return true; if (!test_opt(sbi, RESERVE_ROOT)) return false; + if (IS_NOQUOTA(inode)) + return true; if (capable(CAP_SYS_RESOURCE)) return true; if (uid_eq(sbi->s_resuid, current_fsuid())) @@ -1620,7 +1625,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, avail_user_block_count = sbi->user_block_count - sbi->current_reserved_blocks; - if (!__allow_reserved_blocks(sbi)) + if (!__allow_reserved_blocks(sbi, inode)) avail_user_block_count -= sbi->root_reserved_blocks; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { @@ -1821,7 +1826,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, valid_block_count = sbi->total_valid_block_count + sbi->current_reserved_blocks + 1; - if (!__allow_reserved_blocks(sbi)) + if (!__allow_reserved_blocks(sbi, inode)) valid_block_count += sbi->root_reserved_blocks; if (unlikely(valid_block_count > sbi->user_block_count)) { From a9d572c7550044d5b217b5287d99a2e6d34b97b0 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Wed, 17 Jan 2018 12:11:31 +0800 Subject: [PATCH 57/72] f2fs: avoid hungtask when GC encrypted block if io_bits is set When io_bits is set, GCing encrypted block may hit the following hungtask. Since io_bits requires aligned block address, f2fs_submit_page_write may return -EAGAIN if new_blkaddr does not satisify io_bits alignment. As a result, the encrypted page will never be writtenback. This patch makes move_data_block aware the EAGAIN error and cancel the writeback. [ 246.751371] INFO: task kworker/u4:4:797 blocked for more than 90 seconds. [ 246.752423] Not tainted 4.15.0-rc4+ #11 [ 246.754176] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 246.755336] kworker/u4:4 D25448 797 2 0x80000000 [ 246.755597] Workqueue: writeback wb_workfn (flush-7:0) [ 246.755616] Call Trace: [ 246.755695] ? __schedule+0x322/0xa90 [ 246.755761] ? blk_init_request_from_bio+0x120/0x120 [ 246.755773] ? pci_mmcfg_check_reserved+0xb0/0xb0 [ 246.755801] ? __radix_tree_create+0x19e/0x200 [ 246.755813] ? delete_node+0x136/0x370 [ 246.755838] schedule+0x43/0xc0 [ 246.755904] io_schedule+0x17/0x40 [ 246.755939] wait_on_page_bit_common+0x17b/0x240 [ 246.755950] ? wake_page_function+0xa0/0xa0 [ 246.755961] ? add_to_page_cache_lru+0x160/0x160 [ 246.755972] ? page_cache_tree_insert+0x170/0x170 [ 246.755983] ? __lru_cache_add+0x96/0xb0 [ 246.756086] __filemap_fdatawait_range+0x14f/0x1c0 [ 246.756097] ? wait_on_page_bit_common+0x240/0x240 [ 246.756120] ? __wake_up_locked_key_bookmark+0x20/0x20 [ 246.756167] ? wait_on_all_pages_writeback+0xc9/0x100 [ 246.756179] ? __remove_ino_entry+0x120/0x120 [ 246.756192] ? wait_woken+0x100/0x100 [ 246.756204] filemap_fdatawait_range+0x9/0x20 [ 246.756216] write_checkpoint+0x18a1/0x1f00 [ 246.756254] ? blk_get_request+0x10/0x10 [ 246.756265] ? cpumask_next_and+0x43/0x60 [ 246.756279] ? f2fs_sync_inode_meta+0x160/0x160 [ 246.756289] ? remove_element.isra.4+0xa0/0xa0 [ 246.756300] ? __put_compound_page+0x40/0x40 [ 246.756310] ? f2fs_sync_fs+0xec/0x1c0 [ 246.756320] ? f2fs_sync_fs+0x120/0x1c0 [ 246.756329] f2fs_sync_fs+0x120/0x1c0 [ 246.756357] ? trace_event_raw_event_f2fs__page+0x260/0x260 [ 246.756393] ? ata_build_rw_tf+0x173/0x410 [ 246.756397] f2fs_balance_fs_bg+0x198/0x390 [ 246.756405] ? drop_inmem_page+0x230/0x230 [ 246.756415] ? ahci_qc_prep+0x1bb/0x2e0 [ 246.756418] ? ahci_qc_issue+0x1df/0x290 [ 246.756422] ? __accumulate_pelt_segments+0x42/0xd0 [ 246.756426] ? f2fs_write_node_pages+0xd1/0x380 [ 246.756429] f2fs_write_node_pages+0xd1/0x380 [ 246.756437] ? sync_node_pages+0x8f0/0x8f0 [ 246.756440] ? update_curr+0x53/0x220 [ 246.756444] ? __accumulate_pelt_segments+0xa2/0xd0 [ 246.756448] ? __update_load_avg_se.isra.39+0x349/0x360 [ 246.756452] ? do_writepages+0x2a/0xa0 [ 246.756456] do_writepages+0x2a/0xa0 [ 246.756460] __writeback_single_inode+0x70/0x490 [ 246.756463] ? check_preempt_wakeup+0x199/0x310 [ 246.756467] writeback_sb_inodes+0x2a2/0x660 [ 246.756471] ? is_empty_dir_inode+0x40/0x40 [ 246.756474] ? __writeback_single_inode+0x490/0x490 [ 246.756477] ? string+0xbf/0xf0 [ 246.756480] ? down_read_trylock+0x35/0x60 [ 246.756484] __writeback_inodes_wb+0x9f/0xf0 [ 246.756488] wb_writeback+0x41d/0x4b0 [ 246.756492] ? writeback_inodes_wb.constprop.55+0x150/0x150 [ 246.756498] ? set_worker_desc+0xf7/0x130 [ 246.756502] ? current_is_workqueue_rescuer+0x60/0x60 [ 246.756511] ? _find_next_bit+0x2c/0xa0 [ 246.756514] ? wb_workfn+0x400/0x5d0 [ 246.756518] wb_workfn+0x400/0x5d0 [ 246.756521] ? finish_task_switch+0xdf/0x2a0 [ 246.756525] ? inode_wait_for_writeback+0x30/0x30 [ 246.756529] process_one_work+0x3a7/0x6f0 [ 246.756533] worker_thread+0x82/0x750 [ 246.756537] kthread+0x16f/0x1c0 [ 246.756541] ? trace_event_raw_event_workqueue_work+0x110/0x110 [ 246.756544] ? kthread_create_worker_on_cpu+0xb0/0xb0 [ 246.756548] ret_from_fork+0x1f/0x30 Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 33e79697e41c..aa720cc44509 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -691,7 +691,12 @@ static void move_data_block(struct inode *inode, block_t bidx, fio.op = REQ_OP_WRITE; fio.op_flags = REQ_SYNC; fio.new_blkaddr = newaddr; - f2fs_submit_page_write(&fio); + err = f2fs_submit_page_write(&fio); + if (err) { + if (PageWriteback(fio.encrypted_page)) + end_page_writeback(fio.encrypted_page); + goto put_page_out; + } f2fs_update_iostat(fio.sbi, FS_GC_DATA_IO, F2FS_BLKSIZE); From d7997e63684c98d93defac0c99f589778bc71869 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 17 Jan 2018 16:31:35 +0800 Subject: [PATCH 58/72] f2fs: clean up error path of fill_super This patch cleans up error path of fille_super to avoid unneeded release step. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2bfd09812d2a..8173ae688814 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2604,14 +2604,14 @@ try_onemore: err = init_percpu_info(sbi); if (err) - goto free_options; + goto free_bio_info; if (F2FS_IO_SIZE(sbi) > 1) { sbi->write_io_dummy = mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0); if (!sbi->write_io_dummy) { err = -ENOMEM; - goto free_options; + goto free_percpu; } } @@ -2843,10 +2843,12 @@ free_meta_inode: iput(sbi->meta_inode); free_io_dummy: mempool_destroy(sbi->write_io_dummy); -free_options: +free_percpu: + destroy_percpu_info(sbi); +free_bio_info: for (i = 0; i < NR_PAGE_TYPE; i++) kfree(sbi->write_io[i]); - destroy_percpu_info(sbi); +free_options: #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) kfree(sbi->s_qf_names[i]); From b323fd28bbf95c3181e02e7a642b7d24f3e32714 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 17 Jan 2018 16:31:36 +0800 Subject: [PATCH 59/72] f2fs: kill F2FS_INLINE_XATTR_ADDRS for cleanup Use get_inline_xattr_addrs directly instead of F2FS_INLINE_XATTR_ADDRS. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2db26388b6bf..2b892a619d71 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1200,7 +1200,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, phys = (__u64)blk_to_logical(inode, ni.blk_addr); offset = offsetof(struct f2fs_inode, i_addr) + sizeof(__le32) * (DEF_ADDRS_PER_INODE - - F2FS_INLINE_XATTR_ADDRS(inode)); + get_inline_xattr_addrs(inode)); phys += offset; len = inline_xattr_size(inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 269f9dec3d49..f2656d0e28f2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -414,10 +414,9 @@ struct f2fs_flush_device { #define DEF_MIN_INLINE_SIZE 1 static inline int get_extra_isize(struct inode *inode); static inline int get_inline_xattr_addrs(struct inode *inode); -#define F2FS_INLINE_XATTR_ADDRS(inode) get_inline_xattr_addrs(inode) #define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ (CUR_ADDRS_PER_INODE(inode) - \ - F2FS_INLINE_XATTR_ADDRS(inode) - \ + get_inline_xattr_addrs(inode) - \ DEF_INLINE_RESERVED_SIZE)) /* for inline dir */ @@ -2324,7 +2323,7 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) static inline unsigned int addrs_per_inode(struct inode *inode) { - return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS(inode); + return CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode); } static inline void *inline_xattr_addr(struct inode *inode, struct page *page) @@ -2332,7 +2331,7 @@ static inline void *inline_xattr_addr(struct inode *inode, struct page *page) struct f2fs_inode *ri = F2FS_INODE(page); return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - - F2FS_INLINE_XATTR_ADDRS(inode)]); + get_inline_xattr_addrs(inode)]); } static inline int inline_xattr_size(struct inode *inode) From eb4497975e82448746bb7de3d13b743b7145aed7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 17 Jan 2018 16:31:37 +0800 Subject: [PATCH 60/72] f2fs: fix to update last_disk_size correctly This patch fixes to update last_disk_size only when writing out page successfully. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2b892a619d71..fa91bfeb754b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1716,10 +1716,14 @@ write: } } - down_write(&F2FS_I(inode)->i_sem); - if (F2FS_I(inode)->last_disk_size < psize) - F2FS_I(inode)->last_disk_size = psize; - up_write(&F2FS_I(inode)->i_sem); + if (err) { + file_set_keep_isize(inode); + } else { + down_write(&F2FS_I(inode)->i_sem); + if (F2FS_I(inode)->last_disk_size < psize) + F2FS_I(inode)->last_disk_size = psize; + up_write(&F2FS_I(inode)->i_sem); + } done: if (err && err != -ENOENT) From bb9e3bb8dbf5975a493e4e2dfbb900018579890c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 17 Jan 2018 16:31:38 +0800 Subject: [PATCH 61/72] f2fs: split need_inplace_update This patch splits need_inplace_update to two functions: a. should_update_inplace() includes all conditions that we must use IPU. b. should_update_outplace() includes all conditions that we must use OPU. So that, in f2fs_ioc_set_pin_file() and f2fs_defragment_range(), we can use corresponding function to check whether we can trigger OPU/IPU or not. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 75 ++++++++++++++++++++++++++++++++++++++++++----- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/file.c | 7 ++++- fs/f2fs/segment.h | 41 -------------------------- 4 files changed, 75 insertions(+), 50 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index fa91bfeb754b..f428f7b79c64 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1518,20 +1518,79 @@ retry_encrypt: return PTR_ERR(fio->encrypted_page); } +static inline bool check_inplace_update_policy(struct inode *inode, + struct f2fs_io_info *fio) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int policy = SM_I(sbi)->ipu_policy; + + if (policy & (0x1 << F2FS_IPU_FORCE)) + return true; + if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) + return true; + if (policy & (0x1 << F2FS_IPU_UTIL) && + utilization(sbi) > SM_I(sbi)->min_ipu_util) + return true; + if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) && + utilization(sbi) > SM_I(sbi)->min_ipu_util) + return true; + + /* + * IPU for rewrite async pages + */ + if (policy & (0x1 << F2FS_IPU_ASYNC) && + fio && fio->op == REQ_OP_WRITE && + !(fio->op_flags & REQ_SYNC) && + !f2fs_encrypted_inode(inode)) + return true; + + /* this is only set during fdatasync */ + if (policy & (0x1 << F2FS_IPU_FSYNC) && + is_inode_flag_set(inode, FI_NEED_IPU)) + return true; + + return false; +} + +bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio) +{ + if (f2fs_is_pinned_file(inode)) + return true; + + /* if this is cold file, we should overwrite to avoid fragmentation */ + if (file_is_cold(inode)) + return true; + + return check_inplace_update_policy(inode, fio); +} + +bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (test_opt(sbi, LFS)) + return true; + if (S_ISDIR(inode->i_mode)) + return true; + if (f2fs_is_atomic_file(inode)) + return true; + if (fio) { + if (is_cold_data(fio->page)) + return true; + if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) + return true; + } + return false; +} + static inline bool need_inplace_update(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; - if (f2fs_is_pinned_file(inode)) - return true; - if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) - return false; - if (is_cold_data(fio->page)) - return false; - if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) + if (should_update_outplace(inode, fio)) return false; - return need_inplace_update_policy(inode, fio); + return should_update_inplace(inode, fio); } static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f2656d0e28f2..365faebd27f9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2834,6 +2834,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); +bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); +bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); void f2fs_set_page_dirty_nobuffers(struct page *page); int __f2fs_write_data_pages(struct address_space *mapping, struct writeback_control *wbc, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9553f80dc677..f31f0590cdc7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2082,7 +2082,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, int err; /* if in-place-update policy is enabled, don't waste time here */ - if (need_inplace_update_policy(inode, NULL)) + if (should_update_inplace(inode, NULL)) return -EINVAL; pg_start = range->start >> PAGE_SHIFT; @@ -2716,6 +2716,11 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) inode_lock(inode); + if (should_update_outplace(inode, NULL)) { + ret = -EINVAL; + goto out; + } + if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); F2FS_I(inode)->i_gc_failures = 1; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 71a2aaa286df..1a807e6e7c97 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -580,47 +580,6 @@ enum { F2FS_IPU_ASYNC, }; -static inline bool need_inplace_update_policy(struct inode *inode, - struct f2fs_io_info *fio) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - unsigned int policy = SM_I(sbi)->ipu_policy; - - if (test_opt(sbi, LFS)) - return false; - - /* if this is cold file, we should overwrite to avoid fragmentation */ - if (file_is_cold(inode)) - return true; - - if (policy & (0x1 << F2FS_IPU_FORCE)) - return true; - if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) - return true; - if (policy & (0x1 << F2FS_IPU_UTIL) && - utilization(sbi) > SM_I(sbi)->min_ipu_util) - return true; - if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) && - utilization(sbi) > SM_I(sbi)->min_ipu_util) - return true; - - /* - * IPU for rewrite async pages - */ - if (policy & (0x1 << F2FS_IPU_ASYNC) && - fio && fio->op == REQ_OP_WRITE && - !(fio->op_flags & REQ_SYNC) && - !f2fs_encrypted_inode(inode)) - return true; - - /* this is only set during fdatasync */ - if (policy & (0x1 << F2FS_IPU_FSYNC) && - is_inode_flag_set(inode, FI_NEED_IPU)) - return true; - - return false; -} - static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, int type) { From d027c48447c2c2fab2b2aa1833436cf24ea567e9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 17 Jan 2018 22:28:52 +0800 Subject: [PATCH 62/72] f2fs: hanlde error case in f2fs_ioc_shutdown This patch makes f2fs_ioc_shutdown handling error case correctly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f31f0590cdc7..e20b01acbd2c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1839,14 +1839,20 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) switch (in) { case F2FS_GOING_DOWN_FULLSYNC: sb = freeze_bdev(sb->s_bdev); - if (sb && !IS_ERR(sb)) { + if (IS_ERR(sb)) { + ret = PTR_ERR(sb); + goto out; + } + if (sb) { f2fs_stop_checkpoint(sbi, false); thaw_bdev(sb->s_bdev, sb); } break; case F2FS_GOING_DOWN_METASYNC: /* do checkpoint only */ - f2fs_sync_fs(sb, 1); + ret = f2fs_sync_fs(sb, 1); + if (ret) + goto out; f2fs_stop_checkpoint(sbi, false); break; case F2FS_GOING_DOWN_NOSYNC: From 7950e9ac638e84518fbdd5c930939ad46a1068c5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 18 Jan 2018 17:23:29 +0800 Subject: [PATCH 63/72] f2fs: stop gc/discard thread after fs shutdown Once filesystem shuts down, daemons like gc/discard thread should be aware of it, and do exit, in addtion, drop all cached pending discard commands and turn off real-time discard mode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 7 +++++++ fs/f2fs/segment.c | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 365faebd27f9..b7ba496af28f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2726,6 +2726,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); void init_discard_policy(struct discard_policy *dpolicy, int discard_type, unsigned int granularity); +void drop_discard_cmd(struct f2fs_sb_info *sbi); void stop_discard_thread(struct f2fs_sb_info *sbi); bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e20b01acbd2c..84306c718e68 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1866,6 +1866,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) ret = -EINVAL; goto out; } + + stop_gc_thread(sbi); + stop_discard_thread(sbi); + + drop_discard_cmd(sbi); + clear_opt(sbi, DISCARD); + f2fs_update_time(sbi, REQ_TIME); out: mnt_drop_write_file(filp); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2e8e054db49d..e5739ce23a72 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1263,6 +1263,11 @@ static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) return dropped; } +void drop_discard_cmd(struct f2fs_sb_info *sbi) +{ + __drop_discard_cmd(sbi); +} + static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { From db198ae0f823e13e3698b24049e741978a0f14e3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 18 Jan 2018 17:29:10 +0800 Subject: [PATCH 64/72] f2fs: drop page cache after fs shutdown Don't remain dirtied page cache in f2fs after shutdown, it can mitigate memory pressure of whole system, in order to keep other modules working properly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 +++++-- fs/f2fs/data.c | 12 ++++++------ fs/f2fs/node.c | 19 ++++++++++--------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 097eadc82432..9c7596f7daae 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -237,12 +237,15 @@ static int __f2fs_write_meta_page(struct page *page, trace_f2fs_writepage(page, META); + if (unlikely(f2fs_cp_error(sbi))) { + dec_page_count(sbi, F2FS_DIRTY_META); + unlock_page(page); + return 0; + } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; - if (unlikely(f2fs_cp_error(sbi))) - goto redirty_out; write_meta_page(sbi, page, io_type); dec_page_count(sbi, F2FS_DIRTY_META); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f428f7b79c64..6cba74eb09a7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1716,6 +1716,12 @@ static int __write_data_page(struct page *page, bool *submitted, trace_f2fs_writepage(page, DATA); + /* we should bypass data pages to proceed the kworkder jobs */ + if (unlikely(f2fs_cp_error(sbi))) { + mapping_set_error(page->mapping, -EIO); + goto out; + } + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; @@ -1740,12 +1746,6 @@ write: available_free_memory(sbi, BASE_CHECK)))) goto redirty_out; - /* we should bypass data pages to proceed the kworkder jobs */ - if (unlikely(f2fs_cp_error(sbi))) { - mapping_set_error(page->mapping, -EIO); - goto out; - } - /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { fio.need_lock = LOCK_DONE; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ea29314ca8a3..833b46b9de9f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1340,10 +1340,14 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, trace_f2fs_writepage(page, NODE); + if (unlikely(f2fs_cp_error(sbi))) { + dec_page_count(sbi, F2FS_DIRTY_NODES); + unlock_page(page); + return 0; + } + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (unlikely(f2fs_cp_error(sbi))) - goto redirty_out; /* get old block addr of this node page */ nid = nid_of_node(page); @@ -1580,12 +1584,6 @@ next_step: struct page *page = pvec.pages[i]; bool submitted = false; - if (unlikely(f2fs_cp_error(sbi))) { - pagevec_release(&pvec); - ret = -EIO; - goto out; - } - /* * flushing sequence with step: * 0. indirect nodes @@ -1655,9 +1653,12 @@ continue_unlock: step++; goto next_step; } -out: + if (nwritten) f2fs_submit_merged_write(sbi, NODE); + + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; return ret; } From b2c4692bc25c095ca2317cc5a8619597a892e7af Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Sat, 20 Jan 2018 15:46:33 +0800 Subject: [PATCH 65/72] f2fs: correct removexattr behavior for null valued extended attribute __vfs_removexattr() transfers "NULL" value to the setxattr handler of the f2fs filesystem in order to remove the extended attribute. But, __f2fs_setxattr() just ignores the removal request when the value of the extended attribute is already NULL. We have to remove the extended attribute itself even if the value of that is already NULL. We can reporduce this bug with the below: 1. touch file 2. setfattr -n "user.foo" file 3. setfattr -x "user.foo" file 4. getfattr -d file > user.foo Signed-off-by: Daeho Jeong Signed-off-by: Youngjin Gil Tested-by: Hobin Woo Tested-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 600162f4ddbf..ae2dfa709f5d 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -600,7 +600,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, goto exit; } - if (f2fs_xattr_value_same(here, value, size)) + if (value && f2fs_xattr_value_same(here, value, size)) goto exit; } else if ((flags & XATTR_REPLACE)) { error = -ENODATA; From 37a086f015245a7d81e9e577389872b98aa0719a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 19 Jan 2018 20:01:40 -0800 Subject: [PATCH 66/72] f2fs: recover some i_inline flags This fixes lost i_inline flags during roll-forward. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index cbeef73bc4dd..337f3363f48f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -195,6 +195,20 @@ out: return err; } +static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) +{ + if (ri->i_inline & F2FS_PIN_FILE) + set_inode_flag(inode, FI_PIN_FILE); + else + clear_inode_flag(inode, FI_PIN_FILE); + if (ri->i_inline & F2FS_DATA_EXIST) + set_inode_flag(inode, FI_DATA_EXIST); + else + clear_inode_flag(inode, FI_DATA_EXIST); + if (!(ri->i_inline & F2FS_INLINE_DOTS)) + clear_inode_flag(inode, FI_INLINE_DOTS); +} + static void recover_inode(struct inode *inode, struct page *page) { struct f2fs_inode *raw = F2FS_INODE(page); @@ -211,13 +225,16 @@ static void recover_inode(struct inode *inode, struct page *page) F2FS_I(inode)->i_advise = raw->i_advise; + recover_inline_flags(inode, raw); + if (file_enc_name(inode)) name = ""; else name = F2FS_INODE(page)->i_name; - f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", - ino_of_node(page), name); + f2fs_msg(inode->i_sb, KERN_NOTICE, + "recover_inode: ino = %x, name = %s, inline = %x", + ino_of_node(page), name, raw->i_inline); } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, From f236792311f4575a1ca47240d3a74034096ef9e8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 19 Jan 2018 13:42:33 -0800 Subject: [PATCH 67/72] f2fs: allow to recover node blocks given updated checkpoint If fsck.f2fs changes crc, we have no way to recover some inode blocks by roll- forward recovery. Let's relax the condition to recover them. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 1 + fs/f2fs/node.h | 4 ++++ include/linux/f2fs_fs.h | 1 + 3 files changed, 6 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9c7596f7daae..512dca8abc7d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1157,6 +1157,7 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); + __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG); spin_unlock_irqrestore(&sbi->cp_lock, flags); } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 0ee3e5ff49a3..081ef0d672bf 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -305,6 +305,10 @@ static inline bool is_recoverable_dnode(struct page *page) struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); __u64 cp_ver = cur_cp_version(ckpt); + /* Don't care crc part, if fsck.f2fs sets it. */ + if (__is_set_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG)) + return (cp_ver << 32) == (cpver_of_node(page) << 32); + if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) cp_ver |= (cur_cp_crc(ckpt) << 32); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index cfb522e6affc..6eed677b6d9a 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -117,6 +117,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_NOCRC_RECOVERY_FLAG 0x00000200 #define CP_TRIMMED_FLAG 0x00000100 #define CP_NAT_BITS_FLAG 0x00000080 #define CP_CRC_RECOVERY_FLAG 0x00000040 From 2882d34310a9b87428d723ba9bb040d7ef0b5ba8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 25 Jan 2018 18:57:25 +0800 Subject: [PATCH 68/72] f2fs: use GFP_F2FS_ZERO for cleanup Clean up codes with GFP_F2FS_ZERO, no logic changes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 833b46b9de9f..177c438e4a56 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -143,11 +143,9 @@ static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail) struct nat_entry *new; if (no_fail) - new = f2fs_kmem_cache_alloc(nat_entry_slab, - GFP_NOFS | __GFP_ZERO); + new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO); else - new = kmem_cache_alloc(nat_entry_slab, - GFP_NOFS | __GFP_ZERO); + new = kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO); if (new) { nat_set_nid(new, nid); nat_reset_flag(new); From 6819b884e0eea641a2678be01a41a071efcfd489 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 25 Jan 2018 18:57:26 +0800 Subject: [PATCH 69/72] f2fs: clean up duplicated assignment in init_discard_policy Remove duplicated codes of assignment for .max_requests and .io_aware_gran. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e5739ce23a72..9c09e2c96705 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1715,25 +1715,20 @@ void init_discard_policy(struct discard_policy *dpolicy, dpolicy->sync = true; dpolicy->granularity = granularity; + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + if (discard_type == DPOLICY_BG) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; dpolicy->io_aware = true; } else if (discard_type == DPOLICY_FORCE) { dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; dpolicy->io_aware = true; } else if (discard_type == DPOLICY_FSTRIM) { - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; dpolicy->io_aware = false; } else if (discard_type == DPOLICY_UMOUNT) { - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; dpolicy->io_aware = false; } } From 3b60d802d9828f99db83c3ca4fd99a5fa3fce884 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 25 Jan 2018 18:57:27 +0800 Subject: [PATCH 70/72] f2fs: stop issuing discard if fs is readonly If filesystem is readonly, stop to issue discard in daemon. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9c09e2c96705..ca1f567f701a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1406,6 +1406,8 @@ static int issue_discard_thread(void *data) msecs_to_jiffies(wait_ms)); if (try_to_freeze()) continue; + if (f2fs_readonly(sbi->sb)) + continue; if (kthread_should_stop()) return 0; From 068c3cd85866785303f18960a4c00a182515b963 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 25 Jan 2018 17:27:11 +0800 Subject: [PATCH 71/72] f2fs: rebuild sit page from sit info in mem This patch rebuild sit page from sit info in mem instead of issue a read io. I test this method and the result is as below: Pre: mmc_perf_test-12061 [001] ...1 976.819992: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [001] ...1 976.856446: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [003] ...1 998.976946: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [003] ...1 999.023269: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [003] ...1 1022.060772: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [003] ...1 1022.111034: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [002] ...1 1070.127643: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [003] ...1 1070.187352: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [003] ...1 1095.942124: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [003] ...1 1095.995975: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [003] ...1 1122.535091: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [003] ...1 1122.586521: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [001] ...1 1147.897487: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [001] ...1 1147.959438: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [003] ...1 1177.926951: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [002] ...1 1177.976823: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-12061 [002] ...1 1204.176087: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-12061 [002] ...1 1204.239046: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit Some sit flush consume more than 50ms. Now: mmc_perf_test-2187 [007] ...1 196.840684: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [007] ...1 196.841258: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [007] ...1 219.430582: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [007] ...1 219.431144: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [002] ...1 243.638678: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [000] ...1 243.638980: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [002] ...1 265.392180: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [002] ...1 265.392245: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [000] ...1 290.309051: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [000] ...1 290.309116: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [003] ...1 317.144209: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [003] ...1 317.145913: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [005] ...1 343.224954: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [005] ...1 343.225574: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [000] ...1 370.239846: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [000] ...1 370.241138: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [001] ...1 397.029043: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [001] ...1 397.030750: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit mmc_perf_test-2187 [003] ...1 425.386377: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = start flush sit mmc_perf_test-2187 [003] ...1 425.387735: f2fs_write_checkpoint: dev = (259,44), checkpoint for Sync, state = end flush sit Most sit flush consume no more than 1ms. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 19 +++++-------------- fs/f2fs/segment.h | 29 +++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ca1f567f701a..b16a8e6625aa 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3110,28 +3110,19 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, unsigned int start) { struct sit_info *sit_i = SIT_I(sbi); - struct page *src_page, *dst_page; + struct page *page; pgoff_t src_off, dst_off; - void *src_addr, *dst_addr; src_off = current_sit_addr(sbi, start); dst_off = next_sit_addr(sbi, src_off); - /* get current sit block page without lock */ - src_page = get_meta_page(sbi, src_off); - dst_page = grab_meta_page(sbi, dst_off); - f2fs_bug_on(sbi, PageDirty(src_page)); - - src_addr = page_address(src_page); - dst_addr = page_address(dst_page); - memcpy(dst_addr, src_addr, PAGE_SIZE); - - set_page_dirty(dst_page); - f2fs_put_page(src_page, 1); + page = grab_meta_page(sbi, dst_off); + seg_info_to_sit_page(sbi, page, start); + set_page_dirty(page); set_to_next_sit(sit_i, start); - return dst_page; + return page; } static struct sit_entry_set *grab_sit_entry_set(void) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 1a807e6e7c97..f11c4bc82c78 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -348,16 +348,41 @@ static inline void seg_info_from_raw_sit(struct seg_entry *se, se->mtime = le64_to_cpu(rs->mtime); } -static inline void seg_info_to_raw_sit(struct seg_entry *se, +static inline void __seg_info_to_raw_sit(struct seg_entry *se, struct f2fs_sit_entry *rs) { unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) | se->valid_blocks; rs->vblocks = cpu_to_le16(raw_vblocks); memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); + rs->mtime = cpu_to_le64(se->mtime); +} + +static inline void seg_info_to_sit_page(struct f2fs_sb_info *sbi, + struct page *page, unsigned int start) +{ + struct f2fs_sit_block *raw_sit; + struct seg_entry *se; + struct f2fs_sit_entry *rs; + unsigned int end = min(start + SIT_ENTRY_PER_BLOCK, + (unsigned long)MAIN_SEGS(sbi)); + int i; + + raw_sit = (struct f2fs_sit_block *)page_address(page); + for (i = 0; i < end - start; i++) { + rs = &raw_sit->entries[i]; + se = get_seg_entry(sbi, start + i); + __seg_info_to_raw_sit(se, rs); + } +} + +static inline void seg_info_to_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + __seg_info_to_raw_sit(se, rs); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); se->ckpt_valid_blocks = se->valid_blocks; - rs->mtime = cpu_to_le64(se->mtime); } static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, From 1c1d35df71104c76a4a2e25862926f22c334c9d6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 25 Jan 2018 14:54:42 +0800 Subject: [PATCH 72/72] f2fs: support inode creation time This patch adds creation time field in inode layout to support showing kstat.btime in ->statx. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 +++++++ fs/f2fs/file.c | 9 +++++++++ fs/f2fs/inode.c | 15 +++++++++++++++ fs/f2fs/namei.c | 3 ++- fs/f2fs/sysfs.c | 7 +++++++ include/linux/f2fs_fs.h | 4 +++- 6 files changed, 43 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b7ba496af28f..6300ac5bcbe4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -124,6 +124,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_INODE_CHKSUM 0x0020 #define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040 #define F2FS_FEATURE_QUOTA_INO 0x0080 +#define F2FS_FEATURE_INODE_CRTIME 0x0100 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -635,6 +636,7 @@ struct f2fs_inode_info { int i_extra_isize; /* size of extra space located in i_addr */ kprojid_t i_projid; /* id for project quota */ int i_inline_xattr_size; /* inline xattr size */ + struct timespec i_crtime; /* inode creation time */ }; static inline void get_extent_info(struct extent_info *ext, @@ -3205,6 +3207,11 @@ static inline int f2fs_sb_has_quota_ino(struct super_block *sb) return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_QUOTA_INO); } +static inline int f2fs_sb_has_inode_crtime(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CRTIME); +} + #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkaddr) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 84306c718e68..672a542e5464 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -672,8 +672,17 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, { struct inode *inode = d_inode(path->dentry); struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_inode *ri; unsigned int flags; + if (f2fs_has_extra_attr(inode) && + f2fs_sb_has_inode_crtime(inode->i_sb) && + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = fi->i_crtime.tv_sec; + stat->btime.tv_nsec = fi->i_crtime.tv_nsec; + } + flags = fi->i_flags & (FS_FL_USER_VISIBLE | FS_PROJINHERIT_FL); if (flags & FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 1dc77a40d0ad..89c838bfb067 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -278,6 +278,12 @@ static int do_read_inode(struct inode *inode) i_projid = F2FS_DEF_PROJID; fi->i_projid = make_kprojid(&init_user_ns, i_projid); + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_inode_crtime(sbi->sb) && + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) { + fi->i_crtime.tv_sec = le64_to_cpu(ri->i_crtime); + fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); + } + f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); @@ -421,6 +427,15 @@ void update_inode(struct inode *inode, struct page *node_page) F2FS_I(inode)->i_projid); ri->i_projid = cpu_to_le32(i_projid); } + + if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)->sb) && + F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, + i_crtime)) { + ri->i_crtime = + cpu_to_le64(F2FS_I(inode)->i_crtime.tv_sec); + ri->i_crtime_nsec = + cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec); + } } __set_inode_rdev(inode, ri); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 3ee97ba9d2d7..c4c94c7e9f4f 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -50,7 +50,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) inode->i_ino = ino; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mtime = inode->i_atime = inode->i_ctime = + F2FS_I(inode)->i_crtime = current_time(inode); inode->i_generation = sbi->s_next_generation++; err = insert_inode_locked(inode); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 41887e6ec1b3..d978c7b6ea04 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -113,6 +113,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_quota_ino(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "quota_ino"); + if (f2fs_sb_has_inode_crtime(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "inode_crtime"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -232,6 +235,7 @@ enum feat_id { FEAT_INODE_CHECKSUM, FEAT_FLEXIBLE_INLINE_XATTR, FEAT_QUOTA_INO, + FEAT_INODE_CRTIME, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -246,6 +250,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_INODE_CHECKSUM: case FEAT_FLEXIBLE_INLINE_XATTR: case FEAT_QUOTA_INO: + case FEAT_INODE_CRTIME: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -323,6 +328,7 @@ F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA); F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); +F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -376,6 +382,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(inode_checksum), ATTR_LIST(flexible_inline_xattr), ATTR_LIST(quota_ino), + ATTR_LIST(inode_crtime), NULL, }; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 6eed677b6d9a..58aecb60ea51 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -253,8 +253,10 @@ struct f2fs_inode { __le16 i_inline_xattr_size; /* inline xattr size, unit: 4 bytes */ __le32 i_projid; /* project id */ __le32 i_inode_checksum;/* inode meta checksum */ + __le64 i_crtime; /* creation time */ + __le32 i_crtime_nsec; /* creation time in nano scale */ __le32 i_extra_end[0]; /* for attribute size calculation */ - }; + } __packed; __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ }; __le32 i_nid[DEF_NIDS_PER_INODE]; /* direct(2), indirect(2),