ocfs2: ocfs2_inode_lock_tracker does not distinguish lock level
ocfs2_inode_lock_tracker as a variant of ocfs2_inode_lock, is used to prevent deadlock due to recursive lock acquisition. But this function does not distinguish whether the requested level is EX or PR. If a RP lock has been attained, this function will immediately return success afterwards even an EX lock is requested. But actually the return value does not mean that the process got a EX lock, because ocfs2_inode_lock has not been called. When taking lock levels into account, we face some different situations: 1. no lock is held In this case, just lock the inode and return 0 2. We are holding a lock For this situation, things diverges into several cases wanted holding what to do ex ex see 2.1 below ex pr see 2.2 below pr ex see 2.1 below pr pr see 2.1 below 2.1 lock level that is been held is compatible with the wanted level, so no lock action will be tacken. 2.2 Otherwise, an upgrade is needed, but it is forbidden. Reason why upgrade within a process is forbidden is that lock upgrade may cause dead lock. The following illustrate how it happens. process 1 process 2 ocfs2_inode_lock_tracker(ex=0) <====== ocfs2_inode_lock_tracker(ex=1) ocfs2_inode_lock_tracker(ex=1) For the status quo of ocfs2, without this patch, neither a bug nor end-user impact will be caused because the wrong logic is avoided. But I'm afraid this generic interface, may be called by other developers in future and used in this situation. a process ocfs2_inode_lock_tracker(ex=0) ocfs2_inode_lock_tracker(ex=1) Link: http://lkml.kernel.org/r/20180510053230.17217-1-lchen@suse.com Signed-off-by: Larry Chen <lchen@suse.com> Reviewed-by: Gang He <ghe@suse.com> Cc: Mark Fasheh <mark@fasheh.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Junxiao Bi <junxiao.bi@oracle.com> Cc: Joseph Qi <jiangqi903@gmail.com> Cc: Changwei Ge <ge.changwei@h3c.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
5bc55d654b
commit
133b81f28e
|
@ -788,6 +788,23 @@ static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
|
||||||
spin_unlock(&lockres->l_lock);
|
spin_unlock(&lockres->l_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct ocfs2_lock_holder *
|
||||||
|
ocfs2_pid_holder(struct ocfs2_lock_res *lockres,
|
||||||
|
struct pid *pid)
|
||||||
|
{
|
||||||
|
struct ocfs2_lock_holder *oh;
|
||||||
|
|
||||||
|
spin_lock(&lockres->l_lock);
|
||||||
|
list_for_each_entry(oh, &lockres->l_holders, oh_list) {
|
||||||
|
if (oh->oh_owner_pid == pid) {
|
||||||
|
spin_unlock(&lockres->l_lock);
|
||||||
|
return oh;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock(&lockres->l_lock);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
|
static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
|
||||||
struct ocfs2_lock_holder *oh)
|
struct ocfs2_lock_holder *oh)
|
||||||
{
|
{
|
||||||
|
@ -798,24 +815,6 @@ static inline void ocfs2_remove_holder(struct ocfs2_lock_res *lockres,
|
||||||
put_pid(oh->oh_owner_pid);
|
put_pid(oh->oh_owner_pid);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int ocfs2_is_locked_by_me(struct ocfs2_lock_res *lockres)
|
|
||||||
{
|
|
||||||
struct ocfs2_lock_holder *oh;
|
|
||||||
struct pid *pid;
|
|
||||||
|
|
||||||
/* look in the list of holders for one with the current task as owner */
|
|
||||||
spin_lock(&lockres->l_lock);
|
|
||||||
pid = task_pid(current);
|
|
||||||
list_for_each_entry(oh, &lockres->l_holders, oh_list) {
|
|
||||||
if (oh->oh_owner_pid == pid) {
|
|
||||||
spin_unlock(&lockres->l_lock);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
spin_unlock(&lockres->l_lock);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
|
static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
|
||||||
int level)
|
int level)
|
||||||
|
@ -2610,34 +2609,93 @@ void ocfs2_inode_unlock(struct inode *inode,
|
||||||
*
|
*
|
||||||
* return < 0 on error, return == 0 if there's no lock holder on the stack
|
* return < 0 on error, return == 0 if there's no lock holder on the stack
|
||||||
* before this call, return == 1 if this call would be a recursive locking.
|
* before this call, return == 1 if this call would be a recursive locking.
|
||||||
|
* return == -1 if this lock attempt will cause an upgrade which is forbidden.
|
||||||
|
*
|
||||||
|
* When taking lock levels into account,we face some different situations.
|
||||||
|
*
|
||||||
|
* 1. no lock is held
|
||||||
|
* In this case, just lock the inode as requested and return 0
|
||||||
|
*
|
||||||
|
* 2. We are holding a lock
|
||||||
|
* For this situation, things diverges into several cases
|
||||||
|
*
|
||||||
|
* wanted holding what to do
|
||||||
|
* ex ex see 2.1 below
|
||||||
|
* ex pr see 2.2 below
|
||||||
|
* pr ex see 2.1 below
|
||||||
|
* pr pr see 2.1 below
|
||||||
|
*
|
||||||
|
* 2.1 lock level that is been held is compatible
|
||||||
|
* with the wanted level, so no lock action will be tacken.
|
||||||
|
*
|
||||||
|
* 2.2 Otherwise, an upgrade is needed, but it is forbidden.
|
||||||
|
*
|
||||||
|
* Reason why upgrade within a process is forbidden is that
|
||||||
|
* lock upgrade may cause dead lock. The following illustrates
|
||||||
|
* how it happens.
|
||||||
|
*
|
||||||
|
* thread on node1 thread on node2
|
||||||
|
* ocfs2_inode_lock_tracker(ex=0)
|
||||||
|
*
|
||||||
|
* <====== ocfs2_inode_lock_tracker(ex=1)
|
||||||
|
*
|
||||||
|
* ocfs2_inode_lock_tracker(ex=1)
|
||||||
*/
|
*/
|
||||||
int ocfs2_inode_lock_tracker(struct inode *inode,
|
int ocfs2_inode_lock_tracker(struct inode *inode,
|
||||||
struct buffer_head **ret_bh,
|
struct buffer_head **ret_bh,
|
||||||
int ex,
|
int ex,
|
||||||
struct ocfs2_lock_holder *oh)
|
struct ocfs2_lock_holder *oh)
|
||||||
{
|
{
|
||||||
int status;
|
int status = 0;
|
||||||
int arg_flags = 0, has_locked;
|
|
||||||
struct ocfs2_lock_res *lockres;
|
struct ocfs2_lock_res *lockres;
|
||||||
|
struct ocfs2_lock_holder *tmp_oh;
|
||||||
|
struct pid *pid = task_pid(current);
|
||||||
|
|
||||||
|
|
||||||
lockres = &OCFS2_I(inode)->ip_inode_lockres;
|
lockres = &OCFS2_I(inode)->ip_inode_lockres;
|
||||||
has_locked = ocfs2_is_locked_by_me(lockres);
|
tmp_oh = ocfs2_pid_holder(lockres, pid);
|
||||||
/* Just get buffer head if the cluster lock has been taken */
|
|
||||||
if (has_locked)
|
|
||||||
arg_flags = OCFS2_META_LOCK_GETBH;
|
|
||||||
|
|
||||||
if (likely(!has_locked || ret_bh)) {
|
if (!tmp_oh) {
|
||||||
status = ocfs2_inode_lock_full(inode, ret_bh, ex, arg_flags);
|
/*
|
||||||
|
* This corresponds to the case 1.
|
||||||
|
* We haven't got any lock before.
|
||||||
|
*/
|
||||||
|
status = ocfs2_inode_lock_full(inode, ret_bh, ex, 0);
|
||||||
|
if (status < 0) {
|
||||||
|
if (status != -ENOENT)
|
||||||
|
mlog_errno(status);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
oh->oh_ex = ex;
|
||||||
|
ocfs2_add_holder(lockres, oh);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(ex && !tmp_oh->oh_ex)) {
|
||||||
|
/*
|
||||||
|
* case 2.2 upgrade may cause dead lock, forbid it.
|
||||||
|
*/
|
||||||
|
mlog(ML_ERROR, "Recursive locking is not permitted to "
|
||||||
|
"upgrade to EX level from PR level.\n");
|
||||||
|
dump_stack();
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* case 2.1 OCFS2_META_LOCK_GETBH flag make ocfs2_inode_lock_full.
|
||||||
|
* ignore the lock level and just update it.
|
||||||
|
*/
|
||||||
|
if (ret_bh) {
|
||||||
|
status = ocfs2_inode_lock_full(inode, ret_bh, ex,
|
||||||
|
OCFS2_META_LOCK_GETBH);
|
||||||
if (status < 0) {
|
if (status < 0) {
|
||||||
if (status != -ENOENT)
|
if (status != -ENOENT)
|
||||||
mlog_errno(status);
|
mlog_errno(status);
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!has_locked)
|
return tmp_oh ? 1 : 0;
|
||||||
ocfs2_add_holder(lockres, oh);
|
|
||||||
|
|
||||||
return has_locked;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ocfs2_inode_unlock_tracker(struct inode *inode,
|
void ocfs2_inode_unlock_tracker(struct inode *inode,
|
||||||
|
@ -2649,12 +2707,13 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
|
||||||
|
|
||||||
lockres = &OCFS2_I(inode)->ip_inode_lockres;
|
lockres = &OCFS2_I(inode)->ip_inode_lockres;
|
||||||
/* had_lock means that the currect process already takes the cluster
|
/* had_lock means that the currect process already takes the cluster
|
||||||
* lock previously. If had_lock is 1, we have nothing to do here, and
|
* lock previously.
|
||||||
* it will get unlocked where we got the lock.
|
* If had_lock is 1, we have nothing to do here.
|
||||||
|
* If had_lock is 0, we will release the lock.
|
||||||
*/
|
*/
|
||||||
if (!had_lock) {
|
if (!had_lock) {
|
||||||
|
ocfs2_inode_unlock(inode, oh->oh_ex);
|
||||||
ocfs2_remove_holder(lockres, oh);
|
ocfs2_remove_holder(lockres, oh);
|
||||||
ocfs2_inode_unlock(inode, ex);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -96,6 +96,7 @@ struct ocfs2_trim_fs_info {
|
||||||
struct ocfs2_lock_holder {
|
struct ocfs2_lock_holder {
|
||||||
struct list_head oh_list;
|
struct list_head oh_list;
|
||||||
struct pid *oh_owner_pid;
|
struct pid *oh_owner_pid;
|
||||||
|
int oh_ex;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* ocfs2_inode_lock_full() 'arg_flags' flags */
|
/* ocfs2_inode_lock_full() 'arg_flags' flags */
|
||||||
|
|
Loading…
Reference in New Issue