workqueue: fix possible stall on try_to_grab_pending() of a delayed work item
Currently, when try_to_grab_pending() grabs a delayed work item, it leaves its linked work items alone on the delayed_works. The linked work items are always NO_COLOR and will cause future cwq_activate_first_delayed() increase cwq->nr_active incorrectly, and may cause the whole cwq to stall. For example, state: cwq->max_active = 1, cwq->nr_active = 1 one work in cwq->pool, many in cwq->delayed_works. step1: try_to_grab_pending() removes a work item from delayed_works but leaves its NO_COLOR linked work items on it. step2: Later on, cwq_activate_first_delayed() activates the linked work item increasing ->nr_active. step3: cwq->nr_active = 1, but all activated work items of the cwq are NO_COLOR. When they finish, cwq->nr_active will not be decreased due to NO_COLOR, and no further work items will be activated from cwq->delayed_works. the cwq stalls. Fix it by ensuring the target work item is activated before stealing PENDING in try_to_grab_pending(). This ensures that all the linked work items are activated without incorrectly bumping cwq->nr_active. tj: Updated comment and description. Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Tejun Heo <tj@kernel.org> Cc: stable@kernel.org
This commit is contained in:
parent
a5b4e57d7c
commit
3aa6249759
|
@ -977,10 +977,9 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
|
|||
*nextp = n;
|
||||
}
|
||||
|
||||
static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
|
||||
static void cwq_activate_delayed_work(struct work_struct *work)
|
||||
{
|
||||
struct work_struct *work = list_first_entry(&cwq->delayed_works,
|
||||
struct work_struct, entry);
|
||||
struct cpu_workqueue_struct *cwq = get_work_cwq(work);
|
||||
|
||||
trace_workqueue_activate_work(work);
|
||||
move_linked_works(work, &cwq->pool->worklist, NULL);
|
||||
|
@ -988,6 +987,14 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
|
|||
cwq->nr_active++;
|
||||
}
|
||||
|
||||
static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
|
||||
{
|
||||
struct work_struct *work = list_first_entry(&cwq->delayed_works,
|
||||
struct work_struct, entry);
|
||||
|
||||
cwq_activate_delayed_work(work);
|
||||
}
|
||||
|
||||
/**
|
||||
* cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
|
||||
* @cwq: cwq of interest
|
||||
|
@ -1106,6 +1113,18 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
|
|||
smp_rmb();
|
||||
if (gcwq == get_work_gcwq(work)) {
|
||||
debug_work_deactivate(work);
|
||||
|
||||
/*
|
||||
* A delayed work item cannot be grabbed directly
|
||||
* because it might have linked NO_COLOR work items
|
||||
* which, if left on the delayed_list, will confuse
|
||||
* cwq->nr_active management later on and cause
|
||||
* stall. Make sure the work item is activated
|
||||
* before grabbing.
|
||||
*/
|
||||
if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
|
||||
cwq_activate_delayed_work(work);
|
||||
|
||||
list_del_init(&work->entry);
|
||||
cwq_dec_nr_in_flight(get_work_cwq(work),
|
||||
get_work_color(work),
|
||||
|
|
Loading…
Reference in New Issue