diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 89825261d474..7b4a0fd59512 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1302,7 +1302,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || + is_sbi_flag_set(sbi, SBI_IS_RESIZEFS)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 99e9a5c37b71..7706049d23bf 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -27,8 +27,15 @@ static DEFINE_MUTEX(f2fs_stat_mutex); static void update_general_status(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); int i; + /* these will be changed if online resize is done */ + si->main_area_segs = le32_to_cpu(raw_super->segment_count_main); + si->main_area_sections = le32_to_cpu(raw_super->section_count); + si->main_area_zones = si->main_area_sections / + le32_to_cpu(raw_super->secs_per_zone); + /* validation check of the segment numbers */ si->hit_largest = atomic64_read(&sbi->read_hit_largest); si->hit_cached = atomic64_read(&sbi->read_hit_cached); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9777e306390c..5c983268a303 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -415,6 +415,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) #define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) #define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) +#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -1116,6 +1117,7 @@ enum { SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */ SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ + SBI_IS_RESIZEFS, /* resizefs is in process */ }; enum { @@ -1236,6 +1238,7 @@ struct f2fs_sb_info { unsigned int segs_per_sec; /* segments per section */ unsigned int secs_per_zone; /* sections per zone */ unsigned int total_sections; /* total section count */ + struct mutex resize_mutex; /* for resize exclusion */ unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ @@ -3058,6 +3061,8 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi); int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); +void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, + unsigned int start, unsigned int end); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, @@ -3196,6 +3201,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, unsigned int segno); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); +int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count); /* * recovery.c diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index efdafa886510..74600c4205da 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3059,6 +3059,27 @@ static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg) return f2fs_precache_extents(file_inode(filp)); } +static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); + __u64 block_count; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + if (copy_from_user(&block_count, (void __user *)arg, + sizeof(block_count))) + return -EFAULT; + + ret = f2fs_resize_fs(sbi, block_count); + + return ret; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3115,6 +3136,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_set_pin_file(filp, arg); case F2FS_IOC_PRECACHE_EXTENTS: return f2fs_ioc_precache_extents(filp, arg); + case F2FS_IOC_RESIZE_FS: + return f2fs_ioc_resize_fs(filp, arg); default: return -ENOTTY; } @@ -3228,6 +3251,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_PIN_FILE: case F2FS_IOC_SET_PIN_FILE: case F2FS_IOC_PRECACHE_EXTENTS: + case F2FS_IOC_RESIZE_FS: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 1e029da26053..e19b49b02d1b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -311,10 +311,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, struct sit_info *sm = SIT_I(sbi); struct victim_sel_policy p; unsigned int secno, last_victim; - unsigned int last_segment = MAIN_SEGS(sbi); + unsigned int last_segment; unsigned int nsearched = 0; mutex_lock(&dirty_i->seglist_lock); + last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec; p.alloc_mode = alloc_mode; select_policy(sbi, gc_type, type, &p); @@ -405,7 +406,8 @@ next: sm->last_victim[p.gc_mode] = last_victim + 1; else sm->last_victim[p.gc_mode] = segno + 1; - sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi); + sm->last_victim[p.gc_mode] %= + (MAIN_SECS(sbi) * sbi->segs_per_sec); break; } } @@ -1361,3 +1363,179 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) SIT_I(sbi)->last_victim[ALLOC_NEXT] = GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } + +static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, + unsigned int end) +{ + int type; + unsigned int segno, next_inuse; + int err = 0; + + /* Move out cursegs from the target range */ + for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++) + allocate_segment_for_resize(sbi, type, start, end); + + /* do GC to move out valid blocks in the range */ + for (segno = start; segno <= end; segno += sbi->segs_per_sec) { + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), + }; + + mutex_lock(&sbi->gc_mutex); + do_garbage_collect(sbi, segno, &gc_list, FG_GC); + mutex_unlock(&sbi->gc_mutex); + put_gc_inode(&gc_list); + + if (get_valid_blocks(sbi, segno, true)) + return -EAGAIN; + } + + err = f2fs_sync_fs(sbi->sb, 1); + if (err) + return err; + + next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start); + if (next_inuse <= end) { + f2fs_msg(sbi->sb, KERN_ERR, + "segno %u should be free but still inuse!", next_inuse); + f2fs_bug_on(sbi, 1); + } + return err; +} + +static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) +{ + struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi); + int section_count = le32_to_cpu(raw_sb->section_count); + int segment_count = le32_to_cpu(raw_sb->segment_count); + int segment_count_main = le32_to_cpu(raw_sb->segment_count_main); + long long block_count = le64_to_cpu(raw_sb->block_count); + int segs = secs * sbi->segs_per_sec; + + raw_sb->section_count = cpu_to_le32(section_count + secs); + raw_sb->segment_count = cpu_to_le32(segment_count + segs); + raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); + raw_sb->block_count = cpu_to_le64(block_count + + (long long)segs * sbi->blocks_per_seg); +} + +static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) +{ + int segs = secs * sbi->segs_per_sec; + long long user_block_count = + le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); + + SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs; + MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; + FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; + FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; + F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + + (long long)segs * sbi->blocks_per_seg); +} + +int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) +{ + __u64 old_block_count, shrunk_blocks; + unsigned int secs; + int gc_mode, gc_type; + int err = 0; + __u32 rem; + + old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count); + if (block_count > old_block_count) + return -EINVAL; + + /* new fs size should align to section size */ + div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem); + if (rem) + return -EINVAL; + + if (block_count == old_block_count) + return 0; + + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Should run fsck to repair first."); + return -EINVAL; + } + + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Checkpoint should be enabled."); + return -EINVAL; + } + + freeze_bdev(sbi->sb->s_bdev); + + shrunk_blocks = old_block_count - block_count; + secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); + spin_lock(&sbi->stat_lock); + if (shrunk_blocks + valid_user_blocks(sbi) + + sbi->current_reserved_blocks + sbi->unusable_block_count + + F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count) + err = -ENOSPC; + else + sbi->user_block_count -= shrunk_blocks; + spin_unlock(&sbi->stat_lock); + if (err) { + thaw_bdev(sbi->sb->s_bdev, sbi->sb); + return err; + } + + mutex_lock(&sbi->resize_mutex); + set_sbi_flag(sbi, SBI_IS_RESIZEFS); + + mutex_lock(&DIRTY_I(sbi)->seglist_lock); + + MAIN_SECS(sbi) -= secs; + + for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) + if (SIT_I(sbi)->last_victim[gc_mode] >= + MAIN_SECS(sbi) * sbi->segs_per_sec) + SIT_I(sbi)->last_victim[gc_mode] = 0; + + for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) + if (sbi->next_victim_seg[gc_type] >= + MAIN_SECS(sbi) * sbi->segs_per_sec) + sbi->next_victim_seg[gc_type] = NULL_SEGNO; + + mutex_unlock(&DIRTY_I(sbi)->seglist_lock); + + err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec, + MAIN_SEGS(sbi) - 1); + if (err) + goto out; + + update_sb_metadata(sbi, -secs); + + err = f2fs_commit_super(sbi, false); + if (err) { + update_sb_metadata(sbi, secs); + goto out; + } + + update_fs_metadata(sbi, -secs); + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + err = f2fs_sync_fs(sbi->sb, 1); + if (err) { + update_fs_metadata(sbi, secs); + update_sb_metadata(sbi, secs); + f2fs_commit_super(sbi, false); + } +out: + if (err) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_ERR, + "resize_fs failed, should run fsck to repair!"); + + MAIN_SECS(sbi) += secs; + spin_lock(&sbi->stat_lock); + sbi->user_block_count += shrunk_blocks; + spin_unlock(&sbi->stat_lock); + } + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + mutex_unlock(&sbi->resize_mutex); + thaw_bdev(sbi->sb->s_bdev, sbi->sb); + return err; +} diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 54a3e398d1ea..198b2e68a487 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2657,6 +2657,40 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } +void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, + unsigned int start, unsigned int end) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno; + + down_read(&SM_I(sbi)->curseg_lock); + mutex_lock(&curseg->curseg_mutex); + down_write(&SIT_I(sbi)->sentry_lock); + + segno = CURSEG_I(sbi, type)->segno; + if (segno < start || segno > end) + goto unlock; + + if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type)) + change_curseg(sbi, type); + else + new_curseg(sbi, type, true); + + stat_inc_seg_type(sbi, curseg); + + locate_dirty_segment(sbi, segno); +unlock: + up_write(&SIT_I(sbi)->sentry_lock); + + if (segno != curseg->segno) + f2fs_msg(sbi->sb, KERN_NOTICE, + "For resize: curseg of type %d: %u ==> %u", + type, segno, curseg->segno); + + mutex_unlock(&curseg->curseg_mutex); + up_read(&SM_I(sbi)->curseg_lock); +} + void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { struct curseg_info *curseg; @@ -3786,7 +3820,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_journal *journal = curseg->journal; struct sit_entry_set *ses, *tmp; struct list_head *head = &SM_I(sbi)->sit_entry_set; - bool to_journal = true; + bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS); struct seg_entry *se; down_write(&sit_i->sentry_lock); @@ -3805,7 +3839,8 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * entries, remove all entries from journal and add and account * them in sit entry set. */ - if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL)) + if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) || + !to_journal) remove_sits_in_journal(sbi); /* diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 15d7e30bfc72..049a5957532e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3218,6 +3218,7 @@ try_onemore: mutex_init(&sbi->gc_mutex); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); + mutex_init(&sbi->resize_mutex); init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_change); @@ -3290,6 +3291,9 @@ try_onemore: sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL; } + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FSCK_FLAG)) + set_sbi_flag(sbi, SBI_NEED_FSCK); + /* Initialize device list */ err = f2fs_scan_devices(sbi); if (err) {