ocfs2: add trimfs lock to avoid duplicated trims in cluster
ocfs2 supports trimming the underlying disk via the fstrim command. But there is a problem, ocfs2 is a shared disk cluster file system, if the user configures a scheduled fstrim job on each file system node, this will trigger multiple nodes trimming a shared disk simultaneously, which is very wasteful for CPU and IO consumption. This also might negatively affect the lifetime of poor-quality SSD devices. So we introduce a trimfs dlm lock to communicate with each other in this case, which will make only one fstrim command to do the trimming on a shared disk among the cluster. The fstrim commands from the other nodes should wait for the first fstrim to finish and return success directly, to avoid running the same trim on the shared disk again. Link: http://lkml.kernel.org/r/1513228484-2084-2-git-send-email-ghe@suse.com Signed-off-by: Gang He <ghe@suse.com> Reviewed-by: Changwei Ge <ge.changwei@h3c.com> Cc: Mark Fasheh <mfasheh@versity.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Junxiao Bi <junxiao.bi@oracle.com> Cc: Joseph Qi <jiangqi903@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
4882abebcc
commit
637dd20c49
|
@ -7561,6 +7561,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
|||
struct buffer_head *gd_bh = NULL;
|
||||
struct ocfs2_dinode *main_bm;
|
||||
struct ocfs2_group_desc *gd = NULL;
|
||||
struct ocfs2_trim_fs_info info, *pinfo = NULL;
|
||||
|
||||
start = range->start >> osb->s_clustersize_bits;
|
||||
len = range->len >> osb->s_clustersize_bits;
|
||||
|
@ -7598,6 +7599,42 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
|||
|
||||
trace_ocfs2_trim_fs(start, len, minlen);
|
||||
|
||||
ocfs2_trim_fs_lock_res_init(osb);
|
||||
ret = ocfs2_trim_fs_lock(osb, NULL, 1);
|
||||
if (ret < 0) {
|
||||
if (ret != -EAGAIN) {
|
||||
mlog_errno(ret);
|
||||
ocfs2_trim_fs_lock_res_uninit(osb);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
mlog(ML_NOTICE, "Wait for trim on device (%s) to "
|
||||
"finish, which is running from another node.\n",
|
||||
osb->dev_str);
|
||||
ret = ocfs2_trim_fs_lock(osb, &info, 0);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
ocfs2_trim_fs_lock_res_uninit(osb);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (info.tf_valid && info.tf_success &&
|
||||
info.tf_start == start && info.tf_len == len &&
|
||||
info.tf_minlen == minlen) {
|
||||
/* Avoid sending duplicated trim to a shared device */
|
||||
mlog(ML_NOTICE, "The same trim on device (%s) was "
|
||||
"just done from node (%u), return.\n",
|
||||
osb->dev_str, info.tf_nodenum);
|
||||
range->len = info.tf_trimlen;
|
||||
goto out_trimunlock;
|
||||
}
|
||||
}
|
||||
|
||||
info.tf_nodenum = osb->node_num;
|
||||
info.tf_start = start;
|
||||
info.tf_len = len;
|
||||
info.tf_minlen = minlen;
|
||||
|
||||
/* Determine first and last group to examine based on start and len */
|
||||
first_group = ocfs2_which_cluster_group(main_bm_inode, start);
|
||||
if (first_group == osb->first_cluster_group_blkno)
|
||||
|
@ -7642,6 +7679,13 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
|||
group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
|
||||
}
|
||||
range->len = trimmed * sb->s_blocksize;
|
||||
|
||||
info.tf_trimlen = range->len;
|
||||
info.tf_success = (ret ? 0 : 1);
|
||||
pinfo = &info;
|
||||
out_trimunlock:
|
||||
ocfs2_trim_fs_unlock(osb, pinfo);
|
||||
ocfs2_trim_fs_lock_res_uninit(osb);
|
||||
out_unlock:
|
||||
ocfs2_inode_unlock(main_bm_inode, 0);
|
||||
brelse(main_bm_bh);
|
||||
|
|
Loading…
Reference in New Issue