Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:

 - Move the nohz kick code out of the scheduler tick to a dedicated IPI,
   from Frederic Weisbecker.

  This necessiated quite some background infrastructure rework,
  including:

   * Clean up some irq-work internals
   * Implement remote irq-work
   * Implement nohz kick on top of remote irq-work
   * Move full dynticks timer enqueue notification to new kick
   * Move multi-task notification to new kick
   * Remove unecessary barriers on multi-task notification

 - Remove proliferation of wait_on_bit() action functions and allow
   wait_on_bit_action() functions to support a timeout.  (Neil Brown)

 - Another round of sched/numa improvements, cleanups and fixes.  (Rik
   van Riel)

 - Implement fast idling of CPUs when the system is partially loaded,
   for better scalability.  (Tim Chen)

 - Restructure and fix the CPU hotplug handling code that may leave
   cfs_rq and rt_rq's throttled when tasks are migrated away from a dead
   cpu.  (Kirill Tkhai)

 - Robustify the sched topology setup code.  (Peterz Zijlstra)

 - Improve sched_feat() handling wrt.  static_keys (Jason Baron)

 - Misc fixes.

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (37 commits)
  sched/fair: Fix 'make xmldocs' warning caused by missing description
  sched: Use macro for magic number of -1 for setparam
  sched: Robustify topology setup
  sched: Fix sched_setparam() policy == -1 logic
  sched: Allow wait_on_bit_action() functions to support a timeout
  sched: Remove proliferation of wait_on_bit() action functions
  sched/numa: Revert "Use effective_load() to balance NUMA loads"
  sched: Fix static_key race with sched_feat()
  sched: Remove extra static_key*() function indirection
  sched/rt: Fix replenish_dl_entity() comments to match the current upstream code
  sched: Transform resched_task() into resched_curr()
  sched/deadline: Kill task_struct->pi_top_task
  sched: Rework check_for_tasks()
  sched/rt: Enqueue just unthrottled rt_rq back on the stack in __disable_runtime()
  sched/fair: Disable runtime_enabled on dying rq
  sched/numa: Change scan period code to match intent
  sched/numa: Rework best node setting in task_numa_migrate()
  sched/numa: Examine a task move when examining a task swap
  sched/numa: Simplify task_numa_compare()
  sched/numa: Use effective_load() to balance NUMA loads
  ...
This commit is contained in:
Linus Torvalds 2014-08-04 16:23:30 -07:00
commit 98959948a7
57 changed files with 591 additions and 563 deletions

View File

@ -90,7 +90,7 @@ operations:
to be cleared before proceeding: to be cleared before proceeding:
wait_on_bit(&op->flags, FSCACHE_OP_WAITING, wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
fscache_wait_bit, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
(2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it

View File

@ -1515,7 +1515,7 @@ Doing the same with chrt -r 5 and function-trace set.
<idle>-0 3d.h4 1us+: 0:120:R + [003] 2448: 94:R sleep <idle>-0 3d.h4 1us+: 0:120:R + [003] 2448: 94:R sleep
<idle>-0 3d.h4 2us : ttwu_do_activate.constprop.87 <-try_to_wake_up <idle>-0 3d.h4 2us : ttwu_do_activate.constprop.87 <-try_to_wake_up
<idle>-0 3d.h3 3us : check_preempt_curr <-ttwu_do_wakeup <idle>-0 3d.h3 3us : check_preempt_curr <-ttwu_do_wakeup
<idle>-0 3d.h3 3us : resched_task <-check_preempt_curr <idle>-0 3d.h3 3us : resched_curr <-check_preempt_curr
<idle>-0 3dNh3 4us : task_woken_rt <-ttwu_do_wakeup <idle>-0 3dNh3 4us : task_woken_rt <-ttwu_do_wakeup
<idle>-0 3dNh3 4us : _raw_spin_unlock <-try_to_wake_up <idle>-0 3dNh3 4us : _raw_spin_unlock <-try_to_wake_up
<idle>-0 3dNh3 4us : sub_preempt_count <-_raw_spin_unlock <idle>-0 3dNh3 4us : sub_preempt_count <-_raw_spin_unlock

View File

@ -614,16 +614,6 @@ static void write_endio(struct bio *bio, int error)
wake_up_bit(&b->state, B_WRITING); wake_up_bit(&b->state, B_WRITING);
} }
/*
* This function is called when wait_on_bit is actually waiting.
*/
static int do_io_schedule(void *word)
{
io_schedule();
return 0;
}
/* /*
* Initiate a write on a dirty buffer, but don't wait for it. * Initiate a write on a dirty buffer, but don't wait for it.
* *
@ -640,8 +630,7 @@ static void __write_dirty_buffer(struct dm_buffer *b,
return; return;
clear_bit(B_DIRTY, &b->state); clear_bit(B_DIRTY, &b->state);
wait_on_bit_lock(&b->state, B_WRITING, wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
do_io_schedule, TASK_UNINTERRUPTIBLE);
if (!write_list) if (!write_list)
submit_io(b, WRITE, b->block, write_endio); submit_io(b, WRITE, b->block, write_endio);
@ -675,9 +664,9 @@ static void __make_buffer_clean(struct dm_buffer *b)
if (!b->state) /* fast case */ if (!b->state) /* fast case */
return; return;
wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE); wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
__write_dirty_buffer(b, NULL); __write_dirty_buffer(b, NULL);
wait_on_bit(&b->state, B_WRITING, do_io_schedule, TASK_UNINTERRUPTIBLE); wait_on_bit_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
} }
/* /*
@ -1030,7 +1019,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
if (need_submit) if (need_submit)
submit_io(b, READ, b->block, read_endio); submit_io(b, READ, b->block, read_endio);
wait_on_bit(&b->state, B_READING, do_io_schedule, TASK_UNINTERRUPTIBLE); wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
if (b->read_error) { if (b->read_error) {
int error = b->read_error; int error = b->read_error;
@ -1209,15 +1198,13 @@ again:
dropped_lock = 1; dropped_lock = 1;
b->hold_count++; b->hold_count++;
dm_bufio_unlock(c); dm_bufio_unlock(c);
wait_on_bit(&b->state, B_WRITING, wait_on_bit_io(&b->state, B_WRITING,
do_io_schedule, TASK_UNINTERRUPTIBLE);
TASK_UNINTERRUPTIBLE);
dm_bufio_lock(c); dm_bufio_lock(c);
b->hold_count--; b->hold_count--;
} else } else
wait_on_bit(&b->state, B_WRITING, wait_on_bit_io(&b->state, B_WRITING,
do_io_schedule, TASK_UNINTERRUPTIBLE);
TASK_UNINTERRUPTIBLE);
} }
if (!test_bit(B_DIRTY, &b->state) && if (!test_bit(B_DIRTY, &b->state) &&
@ -1321,15 +1308,15 @@ retry:
__write_dirty_buffer(b, NULL); __write_dirty_buffer(b, NULL);
if (b->hold_count == 1) { if (b->hold_count == 1) {
wait_on_bit(&b->state, B_WRITING, wait_on_bit_io(&b->state, B_WRITING,
do_io_schedule, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
set_bit(B_DIRTY, &b->state); set_bit(B_DIRTY, &b->state);
__unlink_buffer(b); __unlink_buffer(b);
__link_buffer(b, new_block, LIST_DIRTY); __link_buffer(b, new_block, LIST_DIRTY);
} else { } else {
sector_t old_block; sector_t old_block;
wait_on_bit_lock(&b->state, B_WRITING, wait_on_bit_lock_io(&b->state, B_WRITING,
do_io_schedule, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
/* /*
* Relink buffer to "new_block" so that write_callback * Relink buffer to "new_block" so that write_callback
* sees "new_block" as a block number. * sees "new_block" as a block number.
@ -1341,8 +1328,8 @@ retry:
__unlink_buffer(b); __unlink_buffer(b);
__link_buffer(b, new_block, b->list_mode); __link_buffer(b, new_block, b->list_mode);
submit_io(b, WRITE, new_block, write_endio); submit_io(b, WRITE, new_block, write_endio);
wait_on_bit(&b->state, B_WRITING, wait_on_bit_io(&b->state, B_WRITING,
do_io_schedule, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
__unlink_buffer(b); __unlink_buffer(b);
__link_buffer(b, old_block, b->list_mode); __link_buffer(b, old_block, b->list_mode);
} }

View File

@ -1032,21 +1032,13 @@ static void start_merge(struct dm_snapshot *s)
snapshot_merge_next_chunks(s); snapshot_merge_next_chunks(s);
} }
static int wait_schedule(void *ptr)
{
schedule();
return 0;
}
/* /*
* Stop the merging process and wait until it finishes. * Stop the merging process and wait until it finishes.
*/ */
static void stop_merge(struct dm_snapshot *s) static void stop_merge(struct dm_snapshot *s)
{ {
set_bit(SHUTDOWN_MERGE, &s->state_bits); set_bit(SHUTDOWN_MERGE, &s->state_bits);
wait_on_bit(&s->state_bits, RUNNING_MERGE, wait_schedule, wait_on_bit(&s->state_bits, RUNNING_MERGE, TASK_UNINTERRUPTIBLE);
TASK_UNINTERRUPTIBLE);
clear_bit(SHUTDOWN_MERGE, &s->state_bits); clear_bit(SHUTDOWN_MERGE, &s->state_bits);
} }

View File

@ -253,13 +253,6 @@ static int dvb_usbv2_adapter_stream_exit(struct dvb_usb_adapter *adap)
return usb_urb_exitv2(&adap->stream); return usb_urb_exitv2(&adap->stream);
} }
static int wait_schedule(void *ptr)
{
schedule();
return 0;
}
static int dvb_usb_start_feed(struct dvb_demux_feed *dvbdmxfeed) static int dvb_usb_start_feed(struct dvb_demux_feed *dvbdmxfeed)
{ {
struct dvb_usb_adapter *adap = dvbdmxfeed->demux->priv; struct dvb_usb_adapter *adap = dvbdmxfeed->demux->priv;
@ -273,8 +266,7 @@ static int dvb_usb_start_feed(struct dvb_demux_feed *dvbdmxfeed)
dvbdmxfeed->pid, dvbdmxfeed->index); dvbdmxfeed->pid, dvbdmxfeed->index);
/* wait init is done */ /* wait init is done */
wait_on_bit(&adap->state_bits, ADAP_INIT, wait_schedule, wait_on_bit(&adap->state_bits, ADAP_INIT, TASK_UNINTERRUPTIBLE);
TASK_UNINTERRUPTIBLE);
if (adap->active_fe == -1) if (adap->active_fe == -1)
return -EINVAL; return -EINVAL;
@ -568,7 +560,7 @@ static int dvb_usb_fe_sleep(struct dvb_frontend *fe)
if (!adap->suspend_resume_active) { if (!adap->suspend_resume_active) {
set_bit(ADAP_SLEEP, &adap->state_bits); set_bit(ADAP_SLEEP, &adap->state_bits);
wait_on_bit(&adap->state_bits, ADAP_STREAMING, wait_schedule, wait_on_bit(&adap->state_bits, ADAP_STREAMING,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
} }

View File

@ -3437,16 +3437,10 @@ done_unlocked:
return 0; return 0;
} }
static int eb_wait(void *word)
{
io_schedule();
return 0;
}
void wait_on_extent_buffer_writeback(struct extent_buffer *eb) void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
{ {
wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait, wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
} }
static noinline_for_stack int static noinline_for_stack int

View File

@ -61,16 +61,9 @@ inline void touch_buffer(struct buffer_head *bh)
} }
EXPORT_SYMBOL(touch_buffer); EXPORT_SYMBOL(touch_buffer);
static int sleep_on_buffer(void *word)
{
io_schedule();
return 0;
}
void __lock_buffer(struct buffer_head *bh) void __lock_buffer(struct buffer_head *bh)
{ {
wait_on_bit_lock(&bh->b_state, BH_Lock, sleep_on_buffer, wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
TASK_UNINTERRUPTIBLE);
} }
EXPORT_SYMBOL(__lock_buffer); EXPORT_SYMBOL(__lock_buffer);
@ -123,7 +116,7 @@ EXPORT_SYMBOL(buffer_check_dirty_writeback);
*/ */
void __wait_on_buffer(struct buffer_head * bh) void __wait_on_buffer(struct buffer_head * bh)
{ {
wait_on_bit(&bh->b_state, BH_Lock, sleep_on_buffer, TASK_UNINTERRUPTIBLE); wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
} }
EXPORT_SYMBOL(__wait_on_buffer); EXPORT_SYMBOL(__wait_on_buffer);

View File

@ -3934,13 +3934,6 @@ cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb)
return tlink_tcon(cifs_sb_master_tlink(cifs_sb)); return tlink_tcon(cifs_sb_master_tlink(cifs_sb));
} }
static int
cifs_sb_tcon_pending_wait(void *unused)
{
schedule();
return signal_pending(current) ? -ERESTARTSYS : 0;
}
/* find and return a tlink with given uid */ /* find and return a tlink with given uid */
static struct tcon_link * static struct tcon_link *
tlink_rb_search(struct rb_root *root, kuid_t uid) tlink_rb_search(struct rb_root *root, kuid_t uid)
@ -4039,11 +4032,10 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
} else { } else {
wait_for_construction: wait_for_construction:
ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING, ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
cifs_sb_tcon_pending_wait,
TASK_INTERRUPTIBLE); TASK_INTERRUPTIBLE);
if (ret) { if (ret) {
cifs_put_tlink(tlink); cifs_put_tlink(tlink);
return ERR_PTR(ret); return ERR_PTR(-ERESTARTSYS);
} }
/* if it's good, return it */ /* if it's good, return it */

View File

@ -3618,13 +3618,6 @@ static int cifs_launder_page(struct page *page)
return rc; return rc;
} }
static int
cifs_pending_writers_wait(void *unused)
{
schedule();
return 0;
}
void cifs_oplock_break(struct work_struct *work) void cifs_oplock_break(struct work_struct *work)
{ {
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@ -3636,7 +3629,7 @@ void cifs_oplock_break(struct work_struct *work)
int rc = 0; int rc = 0;
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
server->ops->downgrade_oplock(server, cinode, server->ops->downgrade_oplock(server, cinode,
test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags)); test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));

View File

@ -1780,7 +1780,7 @@ cifs_invalidate_mapping(struct inode *inode)
* @word: long word containing the bit lock * @word: long word containing the bit lock
*/ */
static int static int
cifs_wait_bit_killable(void *word) cifs_wait_bit_killable(struct wait_bit_key *key)
{ {
if (fatal_signal_pending(current)) if (fatal_signal_pending(current))
return -ERESTARTSYS; return -ERESTARTSYS;
@ -1794,8 +1794,8 @@ cifs_revalidate_mapping(struct inode *inode)
int rc; int rc;
unsigned long *flags = &CIFS_I(inode)->flags; unsigned long *flags = &CIFS_I(inode)->flags;
rc = wait_on_bit_lock(flags, CIFS_INO_LOCK, cifs_wait_bit_killable, rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
TASK_KILLABLE); TASK_KILLABLE);
if (rc) if (rc)
return rc; return rc;

View File

@ -582,7 +582,7 @@ int cifs_get_writer(struct cifsInodeInfo *cinode)
start: start:
rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK, rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK,
cifs_oplock_break_wait, TASK_KILLABLE); TASK_KILLABLE);
if (rc) if (rc)
return rc; return rc;

View File

@ -342,7 +342,8 @@ static void __inode_wait_for_writeback(struct inode *inode)
wqh = bit_waitqueue(&inode->i_state, __I_SYNC); wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
while (inode->i_state & I_SYNC) { while (inode->i_state & I_SYNC) {
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); __wait_on_bit(wqh, &wq, bit_wait,
TASK_UNINTERRUPTIBLE);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
} }
} }

View File

@ -160,7 +160,7 @@ void __fscache_enable_cookie(struct fscache_cookie *cookie,
_enter("%p", cookie); _enter("%p", cookie);
wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
fscache_wait_bit, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
goto out_unlock; goto out_unlock;
@ -255,7 +255,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
if (!fscache_defer_lookup) { if (!fscache_defer_lookup) {
_debug("non-deferred lookup %p", &cookie->flags); _debug("non-deferred lookup %p", &cookie->flags);
wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
fscache_wait_bit, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
_debug("complete"); _debug("complete");
if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags)) if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags))
goto unavailable; goto unavailable;
@ -463,7 +463,6 @@ void __fscache_wait_on_invalidate(struct fscache_cookie *cookie)
_enter("%p", cookie); _enter("%p", cookie);
wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING, wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING,
fscache_wait_bit_interruptible,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
_leave(""); _leave("");
@ -525,7 +524,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
} }
wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
fscache_wait_bit, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
goto out_unlock_enable; goto out_unlock_enable;

View File

@ -97,8 +97,6 @@ static inline bool fscache_object_congested(void)
return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq); return workqueue_congested(WORK_CPU_UNBOUND, fscache_object_wq);
} }
extern int fscache_wait_bit(void *);
extern int fscache_wait_bit_interruptible(void *);
extern int fscache_wait_atomic_t(atomic_t *); extern int fscache_wait_atomic_t(atomic_t *);
/* /*

View File

@ -196,24 +196,6 @@ static void __exit fscache_exit(void)
module_exit(fscache_exit); module_exit(fscache_exit);
/*
* wait_on_bit() sleep function for uninterruptible waiting
*/
int fscache_wait_bit(void *flags)
{
schedule();
return 0;
}
/*
* wait_on_bit() sleep function for interruptible waiting
*/
int fscache_wait_bit_interruptible(void *flags)
{
schedule();
return signal_pending(current);
}
/* /*
* wait_on_atomic_t() sleep function for uninterruptible waiting * wait_on_atomic_t() sleep function for uninterruptible waiting
*/ */

View File

@ -298,7 +298,6 @@ int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
jif = jiffies; jif = jiffies;
if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
fscache_wait_bit_interruptible,
TASK_INTERRUPTIBLE) != 0) { TASK_INTERRUPTIBLE) != 0) {
fscache_stat(&fscache_n_retrievals_intr); fscache_stat(&fscache_n_retrievals_intr);
_leave(" = -ERESTARTSYS"); _leave(" = -ERESTARTSYS");
@ -342,7 +341,6 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
if (stat_op_waits) if (stat_op_waits)
fscache_stat(stat_op_waits); fscache_stat(stat_op_waits);
if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING, if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
fscache_wait_bit_interruptible,
TASK_INTERRUPTIBLE) != 0) { TASK_INTERRUPTIBLE) != 0) {
ret = fscache_cancel_op(op, do_cancel); ret = fscache_cancel_op(op, do_cancel);
if (ret == 0) if (ret == 0)
@ -351,7 +349,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
/* it's been removed from the pending queue by another party, /* it's been removed from the pending queue by another party,
* so we should get to run shortly */ * so we should get to run shortly */
wait_on_bit(&op->flags, FSCACHE_OP_WAITING, wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
fscache_wait_bit, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
} }
_debug("<<< GO"); _debug("<<< GO");

View File

@ -855,27 +855,6 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
gh->gh_ip = 0; gh->gh_ip = 0;
} }
/**
* gfs2_glock_holder_wait
* @word: unused
*
* This function and gfs2_glock_demote_wait both show up in the WCHAN
* field. Thus I've separated these otherwise identical functions in
* order to be more informative to the user.
*/
static int gfs2_glock_holder_wait(void *word)
{
schedule();
return 0;
}
static int gfs2_glock_demote_wait(void *word)
{
schedule();
return 0;
}
/** /**
* gfs2_glock_wait - wait on a glock acquisition * gfs2_glock_wait - wait on a glock acquisition
* @gh: the glock holder * @gh: the glock holder
@ -888,7 +867,7 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
unsigned long time1 = jiffies; unsigned long time1 = jiffies;
might_sleep(); might_sleep();
wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */ if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
/* Lengthen the minimum hold time. */ /* Lengthen the minimum hold time. */
gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
@ -1128,7 +1107,7 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
struct gfs2_glock *gl = gh->gh_gl; struct gfs2_glock *gl = gh->gh_gl;
gfs2_glock_dq(gh); gfs2_glock_dq(gh);
might_sleep(); might_sleep();
wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
} }
/** /**

View File

@ -936,12 +936,6 @@ fail:
return error; return error;
} }
static int dlm_recovery_wait(void *word)
{
schedule();
return 0;
}
static int control_first_done(struct gfs2_sbd *sdp) static int control_first_done(struct gfs2_sbd *sdp)
{ {
struct lm_lockstruct *ls = &sdp->sd_lockstruct; struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@ -976,7 +970,7 @@ restart:
fs_info(sdp, "control_first_done wait gen %u\n", start_gen); fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY, wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
dlm_recovery_wait, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
goto restart; goto restart;
} }

View File

@ -1024,20 +1024,13 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
lm->lm_unmount(sdp); lm->lm_unmount(sdp);
} }
static int gfs2_journalid_wait(void *word)
{
if (signal_pending(current))
return -EINTR;
schedule();
return 0;
}
static int wait_on_journal(struct gfs2_sbd *sdp) static int wait_on_journal(struct gfs2_sbd *sdp)
{ {
if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
return 0; return 0;
return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, gfs2_journalid_wait, TASK_INTERRUPTIBLE); return wait_on_bit(&sdp->sd_flags, SDF_NOJOURNALID, TASK_INTERRUPTIBLE)
? -EINTR : 0;
} }
void gfs2_online_uevent(struct gfs2_sbd *sdp) void gfs2_online_uevent(struct gfs2_sbd *sdp)

View File

@ -591,12 +591,6 @@ done:
wake_up_bit(&jd->jd_flags, JDF_RECOVERY); wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
} }
static int gfs2_recovery_wait(void *word)
{
schedule();
return 0;
}
int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait) int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
{ {
int rv; int rv;
@ -609,7 +603,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
BUG_ON(!rv); BUG_ON(!rv);
if (wait) if (wait)
wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
return wait ? jd->jd_recover_error : 0; return wait ? jd->jd_recover_error : 0;

View File

@ -864,12 +864,6 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
return error; return error;
} }
static int gfs2_umount_recovery_wait(void *word)
{
schedule();
return 0;
}
/** /**
* gfs2_put_super - Unmount the filesystem * gfs2_put_super - Unmount the filesystem
* @sb: The VFS superblock * @sb: The VFS superblock
@ -894,7 +888,7 @@ restart:
continue; continue;
spin_unlock(&sdp->sd_jindex_spin); spin_unlock(&sdp->sd_jindex_spin);
wait_on_bit(&jd->jd_flags, JDF_RECOVERY, wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
goto restart; goto restart;
} }
spin_unlock(&sdp->sd_jindex_spin); spin_unlock(&sdp->sd_jindex_spin);

View File

@ -1695,13 +1695,6 @@ int inode_needs_sync(struct inode *inode)
} }
EXPORT_SYMBOL(inode_needs_sync); EXPORT_SYMBOL(inode_needs_sync);
int inode_wait(void *word)
{
schedule();
return 0;
}
EXPORT_SYMBOL(inode_wait);
/* /*
* If we try to find an inode in the inode hash while it is being * If we try to find an inode in the inode hash while it is being
* deleted, we have to wait until the filesystem completes its * deleted, we have to wait until the filesystem completes its

View File

@ -763,12 +763,6 @@ static void warn_dirty_buffer(struct buffer_head *bh)
bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
} }
static int sleep_on_shadow_bh(void *word)
{
io_schedule();
return 0;
}
/* /*
* If the buffer is already part of the current transaction, then there * If the buffer is already part of the current transaction, then there
* is nothing we need to do. If it is already part of a prior * is nothing we need to do. If it is already part of a prior
@ -906,8 +900,8 @@ repeat:
if (buffer_shadow(bh)) { if (buffer_shadow(bh)) {
JBUFFER_TRACE(jh, "on shadow: sleep"); JBUFFER_TRACE(jh, "on shadow: sleep");
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
wait_on_bit(&bh->b_state, BH_Shadow, wait_on_bit_io(&bh->b_state, BH_Shadow,
sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
goto repeat; goto repeat;
} }

View File

@ -361,8 +361,8 @@ start:
* Prevent starvation issues if someone is doing a consistency * Prevent starvation issues if someone is doing a consistency
* sync-to-disk * sync-to-disk
*/ */
ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
nfs_wait_bit_killable, TASK_KILLABLE); nfs_wait_bit_killable, TASK_KILLABLE);
if (ret) if (ret)
return ret; return ret;

View File

@ -783,8 +783,8 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
{ {
might_sleep(); might_sleep();
wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING,
nfs_wait_bit_killable, TASK_KILLABLE); nfs_wait_bit_killable, TASK_KILLABLE);
} }
static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)

View File

@ -75,7 +75,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
* nfs_wait_bit_killable - helper for functions that are sleeping on bit locks * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
* @word: long word containing the bit lock * @word: long word containing the bit lock
*/ */
int nfs_wait_bit_killable(void *word) int nfs_wait_bit_killable(struct wait_bit_key *key)
{ {
if (fatal_signal_pending(current)) if (fatal_signal_pending(current))
return -ERESTARTSYS; return -ERESTARTSYS;
@ -1074,8 +1074,8 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
* the bit lock here if it looks like we're going to be doing that. * the bit lock here if it looks like we're going to be doing that.
*/ */
for (;;) { for (;;) {
ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING, ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
nfs_wait_bit_killable, TASK_KILLABLE); nfs_wait_bit_killable, TASK_KILLABLE);
if (ret) if (ret)
goto out; goto out;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);

View File

@ -348,7 +348,7 @@ extern int nfs_drop_inode(struct inode *);
extern void nfs_clear_inode(struct inode *); extern void nfs_clear_inode(struct inode *);
extern void nfs_evict_inode(struct inode *); extern void nfs_evict_inode(struct inode *);
void nfs_zap_acl_cache(struct inode *inode); void nfs_zap_acl_cache(struct inode *inode);
extern int nfs_wait_bit_killable(void *word); extern int nfs_wait_bit_killable(struct wait_bit_key *key);
/* super.c */ /* super.c */
extern const struct super_operations nfs_sops; extern const struct super_operations nfs_sops;

View File

@ -1251,8 +1251,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
might_sleep(); might_sleep();
atomic_inc(&clp->cl_count); atomic_inc(&clp->cl_count);
res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
nfs_wait_bit_killable, TASK_KILLABLE); nfs_wait_bit_killable, TASK_KILLABLE);
if (res) if (res)
goto out; goto out;
if (clp->cl_cons_state < 0) if (clp->cl_cons_state < 0)

View File

@ -115,7 +115,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
set_bit(NFS_IO_INPROGRESS, &c->flags); set_bit(NFS_IO_INPROGRESS, &c->flags);
if (atomic_read(&c->io_count) == 0) if (atomic_read(&c->io_count) == 0)
break; break;
ret = nfs_wait_bit_killable(&c->flags); ret = nfs_wait_bit_killable(&q.key);
} while (atomic_read(&c->io_count) != 0); } while (atomic_read(&c->io_count) != 0);
finish_wait(wq, &q.wait); finish_wait(wq, &q.wait);
return ret; return ret;
@ -136,12 +136,6 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
return __nfs_iocounter_wait(c); return __nfs_iocounter_wait(c);
} }
static int nfs_wait_bit_uninterruptible(void *word)
{
io_schedule();
return 0;
}
/* /*
* nfs_page_group_lock - lock the head of the page group * nfs_page_group_lock - lock the head of the page group
* @req - request in group that is to be locked * @req - request in group that is to be locked
@ -156,7 +150,6 @@ nfs_page_group_lock(struct nfs_page *req)
WARN_ON_ONCE(head != head->wb_head); WARN_ON_ONCE(head != head->wb_head);
wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
nfs_wait_bit_uninterruptible,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
} }
@ -435,9 +428,8 @@ void nfs_release_request(struct nfs_page *req)
int int
nfs_wait_on_request(struct nfs_page *req) nfs_wait_on_request(struct nfs_page *req)
{ {
return wait_on_bit(&req->wb_flags, PG_BUSY, return wait_on_bit_io(&req->wb_flags, PG_BUSY,
nfs_wait_bit_uninterruptible, TASK_UNINTERRUPTIBLE);
TASK_UNINTERRUPTIBLE);
} }
/* /*

View File

@ -1885,7 +1885,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
if (!sync) if (!sync)
goto out; goto out;
status = wait_on_bit_lock(&nfsi->flags, status = wait_on_bit_lock_action(&nfsi->flags,
NFS_INO_LAYOUTCOMMITTING, NFS_INO_LAYOUTCOMMITTING,
nfs_wait_bit_killable, nfs_wait_bit_killable,
TASK_KILLABLE); TASK_KILLABLE);

View File

@ -623,7 +623,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
int err; int err;
/* Stop dirtying of new pages while we sync */ /* Stop dirtying of new pages while we sync */
err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING,
nfs_wait_bit_killable, TASK_KILLABLE); nfs_wait_bit_killable, TASK_KILLABLE);
if (err) if (err)
goto out_err; goto out_err;
@ -1703,7 +1703,7 @@ int nfs_commit_inode(struct inode *inode, int how)
return error; return error;
if (!may_wait) if (!may_wait)
goto out_mark_dirty; goto out_mark_dirty;
error = wait_on_bit(&NFS_I(inode)->flags, error = wait_on_bit_action(&NFS_I(inode)->flags,
NFS_INO_COMMIT, NFS_INO_COMMIT,
nfs_wait_bit_killable, nfs_wait_bit_killable,
TASK_KILLABLE); TASK_KILLABLE);

View File

@ -33,6 +33,11 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } #define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), }
bool irq_work_queue(struct irq_work *work); bool irq_work_queue(struct irq_work *work);
#ifdef CONFIG_SMP
bool irq_work_queue_on(struct irq_work *work, int cpu);
#endif
void irq_work_run(void); void irq_work_run(void);
void irq_work_sync(struct irq_work *work); void irq_work_sync(struct irq_work *work);

View File

@ -1437,8 +1437,6 @@ struct task_struct {
struct rb_node *pi_waiters_leftmost; struct rb_node *pi_waiters_leftmost;
/* Deadlock detection and priority inheritance handling */ /* Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on; struct rt_mutex_waiter *pi_blocked_on;
/* Top pi_waiters task */
struct task_struct *pi_top_task;
#endif #endif
#ifdef CONFIG_DEBUG_MUTEXES #ifdef CONFIG_DEBUG_MUTEXES
@ -2782,7 +2780,7 @@ static inline bool __must_check current_set_polling_and_test(void)
/* /*
* Polling state must be visible before we test NEED_RESCHED, * Polling state must be visible before we test NEED_RESCHED,
* paired by resched_task() * paired by resched_curr()
*/ */
smp_mb__after_atomic(); smp_mb__after_atomic();
@ -2800,7 +2798,7 @@ static inline bool __must_check current_clr_polling_and_test(void)
/* /*
* Polling state must be visible before we test NEED_RESCHED, * Polling state must be visible before we test NEED_RESCHED,
* paired by resched_task() * paired by resched_curr()
*/ */
smp_mb__after_atomic(); smp_mb__after_atomic();
@ -2832,7 +2830,7 @@ static inline void current_clr_polling(void)
* TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
* fold. * fold.
*/ */
smp_mb(); /* paired with resched_task() */ smp_mb(); /* paired with resched_curr() */
preempt_fold_need_resched(); preempt_fold_need_resched();
} }

View File

@ -236,7 +236,7 @@ void * rpc_malloc(struct rpc_task *, size_t);
void rpc_free(void *); void rpc_free(void *);
int rpciod_up(void); int rpciod_up(void);
void rpciod_down(void); void rpciod_down(void);
int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *)); int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *);
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
struct net; struct net;
void rpc_show_tasks(struct net *); void rpc_show_tasks(struct net *);

View File

@ -183,7 +183,13 @@ static inline bool tick_nohz_full_cpu(int cpu)
extern void tick_nohz_init(void); extern void tick_nohz_init(void);
extern void __tick_nohz_full_check(void); extern void __tick_nohz_full_check(void);
extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu);
static inline void tick_nohz_full_kick(void)
{
tick_nohz_full_kick_cpu(smp_processor_id());
}
extern void tick_nohz_full_kick_all(void); extern void tick_nohz_full_kick_all(void);
extern void __tick_nohz_task_switch(struct task_struct *tsk); extern void __tick_nohz_task_switch(struct task_struct *tsk);
#else #else
@ -191,6 +197,7 @@ static inline void tick_nohz_init(void) { }
static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void __tick_nohz_full_check(void) { } static inline void __tick_nohz_full_check(void) { }
static inline void tick_nohz_full_kick_cpu(int cpu) { }
static inline void tick_nohz_full_kick(void) { } static inline void tick_nohz_full_kick(void) { }
static inline void tick_nohz_full_kick_all(void) { } static inline void tick_nohz_full_kick_all(void) { }
static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }

View File

@ -25,6 +25,7 @@ struct wait_bit_key {
void *flags; void *flags;
int bit_nr; int bit_nr;
#define WAIT_ATOMIC_T_BIT_NR -1 #define WAIT_ATOMIC_T_BIT_NR -1
unsigned long private;
}; };
struct wait_bit_queue { struct wait_bit_queue {
@ -141,18 +142,19 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
list_del(&old->task_list); list_del(&old->task_list);
} }
typedef int wait_bit_action_f(struct wait_bit_key *);
void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr);
void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
void __wake_up_bit(wait_queue_head_t *, void *, int); void __wake_up_bit(wait_queue_head_t *, void *, int);
int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned);
void wake_up_bit(void *, int); void wake_up_bit(void *, int);
void wake_up_atomic_t(atomic_t *); void wake_up_atomic_t(atomic_t *);
int out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned); int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned);
int out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned); int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned);
int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned); int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
wait_queue_head_t *bit_waitqueue(void *, int); wait_queue_head_t *bit_waitqueue(void *, int);
@ -854,11 +856,14 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
(wait)->flags = 0; \ (wait)->flags = 0; \
} while (0) } while (0)
extern int bit_wait(struct wait_bit_key *);
extern int bit_wait_io(struct wait_bit_key *);
/** /**
* wait_on_bit - wait for a bit to be cleared * wait_on_bit - wait for a bit to be cleared
* @word: the word being waited on, a kernel virtual address * @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on * @bit: the bit of the word being waited on
* @action: the function used to sleep, which may take special actions
* @mode: the task state to sleep in * @mode: the task state to sleep in
* *
* There is a standard hashed waitqueue table for generic use. This * There is a standard hashed waitqueue table for generic use. This
@ -867,9 +872,62 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
* call wait_on_bit() in threads waiting for the bit to clear. * call wait_on_bit() in threads waiting for the bit to clear.
* One uses wait_on_bit() where one is waiting for the bit to clear, * One uses wait_on_bit() where one is waiting for the bit to clear,
* but has no intention of setting it. * but has no intention of setting it.
* Returned value will be zero if the bit was cleared, or non-zero
* if the process received a signal and the mode permitted wakeup
* on that signal.
*/ */
static inline int static inline int
wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode) wait_on_bit(void *word, int bit, unsigned mode)
{
if (!test_bit(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit,
bit_wait,
mode);
}
/**
* wait_on_bit_io - wait for a bit to be cleared
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
* @mode: the task state to sleep in
*
* Use the standard hashed waitqueue table to wait for a bit
* to be cleared. This is similar to wait_on_bit(), but calls
* io_schedule() instead of schedule() for the actual waiting.
*
* Returned value will be zero if the bit was cleared, or non-zero
* if the process received a signal and the mode permitted wakeup
* on that signal.
*/
static inline int
wait_on_bit_io(void *word, int bit, unsigned mode)
{
if (!test_bit(bit, word))
return 0;
return out_of_line_wait_on_bit(word, bit,
bit_wait_io,
mode);
}
/**
* wait_on_bit_action - wait for a bit to be cleared
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
* @action: the function used to sleep, which may take special actions
* @mode: the task state to sleep in
*
* Use the standard hashed waitqueue table to wait for a bit
* to be cleared, and allow the waiting action to be specified.
* This is like wait_on_bit() but allows fine control of how the waiting
* is done.
*
* Returned value will be zero if the bit was cleared, or non-zero
* if the process received a signal and the mode permitted wakeup
* on that signal.
*/
static inline int
wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
{ {
if (!test_bit(bit, word)) if (!test_bit(bit, word))
return 0; return 0;
@ -880,7 +938,6 @@ wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode)
* wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
* @word: the word being waited on, a kernel virtual address * @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on * @bit: the bit of the word being waited on
* @action: the function used to sleep, which may take special actions
* @mode: the task state to sleep in * @mode: the task state to sleep in
* *
* There is a standard hashed waitqueue table for generic use. This * There is a standard hashed waitqueue table for generic use. This
@ -891,9 +948,61 @@ wait_on_bit(void *word, int bit, int (*action)(void *), unsigned mode)
* wait_on_bit() in threads waiting to be able to set the bit. * wait_on_bit() in threads waiting to be able to set the bit.
* One uses wait_on_bit_lock() where one is waiting for the bit to * One uses wait_on_bit_lock() where one is waiting for the bit to
* clear with the intention of setting it, and when done, clearing it. * clear with the intention of setting it, and when done, clearing it.
*
* Returns zero if the bit was (eventually) found to be clear and was
* set. Returns non-zero if a signal was delivered to the process and
* the @mode allows that signal to wake the process.
*/ */
static inline int static inline int
wait_on_bit_lock(void *word, int bit, int (*action)(void *), unsigned mode) wait_on_bit_lock(void *word, int bit, unsigned mode)
{
if (!test_and_set_bit(bit, word))
return 0;
return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
}
/**
* wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
* @mode: the task state to sleep in
*
* Use the standard hashed waitqueue table to wait for a bit
* to be cleared and then to atomically set it. This is similar
* to wait_on_bit(), but calls io_schedule() instead of schedule()
* for the actual waiting.
*
* Returns zero if the bit was (eventually) found to be clear and was
* set. Returns non-zero if a signal was delivered to the process and
* the @mode allows that signal to wake the process.
*/
static inline int
wait_on_bit_lock_io(void *word, int bit, unsigned mode)
{
if (!test_and_set_bit(bit, word))
return 0;
return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
}
/**
* wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
* @word: the word being waited on, a kernel virtual address
* @bit: the bit of the word being waited on
* @action: the function used to sleep, which may take special actions
* @mode: the task state to sleep in
*
* Use the standard hashed waitqueue table to wait for a bit
* to be cleared and then to set it, and allow the waiting action
* to be specified.
* This is like wait_on_bit() but allows fine control of how the waiting
* is done.
*
* Returns zero if the bit was (eventually) found to be clear and was
* set. Returns non-zero if a signal was delivered to the process and
* the @mode allows that signal to wake the process.
*/
static inline int
wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
{ {
if (!test_and_set_bit(bit, word)) if (!test_and_set_bit(bit, word))
return 0; return 0;

View File

@ -90,7 +90,6 @@ struct writeback_control {
* fs/fs-writeback.c * fs/fs-writeback.c
*/ */
struct bdi_writeback; struct bdi_writeback;
int inode_wait(void *);
void writeback_inodes_sb(struct super_block *, enum wb_reason reason); void writeback_inodes_sb(struct super_block *, enum wb_reason reason);
void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, void writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
enum wb_reason reason); enum wb_reason reason);
@ -105,7 +104,7 @@ void inode_wait_for_writeback(struct inode *inode);
static inline void wait_on_inode(struct inode *inode) static inline void wait_on_inode(struct inode *inode)
{ {
might_sleep(); might_sleep();
wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE); wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE);
} }
/* /*

View File

@ -274,21 +274,28 @@ void clear_tasks_mm_cpumask(int cpu)
rcu_read_unlock(); rcu_read_unlock();
} }
static inline void check_for_tasks(int cpu) static inline void check_for_tasks(int dead_cpu)
{ {
struct task_struct *p; struct task_struct *g, *p;
cputime_t utime, stime;
write_lock_irq(&tasklist_lock); read_lock_irq(&tasklist_lock);
for_each_process(p) { do_each_thread(g, p) {
task_cputime(p, &utime, &stime); if (!p->on_rq)
if (task_cpu(p) == cpu && p->state == TASK_RUNNING && continue;
(utime || stime)) /*
pr_warn("Task %s (pid = %d) is on cpu %d (state = %ld, flags = %x)\n", * We do the check with unlocked task_rq(p)->lock.
p->comm, task_pid_nr(p), cpu, * Order the reading to do not warn about a task,
p->state, p->flags); * which was running on this cpu in the past, and
} * it's just been woken on another cpu.
write_unlock_irq(&tasklist_lock); */
rmb();
if (task_cpu(p) != dead_cpu)
continue;
pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
} while_each_thread(g, p);
read_unlock_irq(&tasklist_lock);
} }
struct take_cpu_down_param { struct take_cpu_down_param {

View File

@ -1095,7 +1095,6 @@ static void rt_mutex_init_task(struct task_struct *p)
p->pi_waiters = RB_ROOT; p->pi_waiters = RB_ROOT;
p->pi_waiters_leftmost = NULL; p->pi_waiters_leftmost = NULL;
p->pi_blocked_on = NULL; p->pi_blocked_on = NULL;
p->pi_top_task = NULL;
#endif #endif
} }

View File

@ -16,11 +16,12 @@
#include <linux/tick.h> #include <linux/tick.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/smp.h>
#include <asm/processor.h> #include <asm/processor.h>
static DEFINE_PER_CPU(struct llist_head, irq_work_list); static DEFINE_PER_CPU(struct llist_head, raised_list);
static DEFINE_PER_CPU(int, irq_work_raised); static DEFINE_PER_CPU(struct llist_head, lazy_list);
/* /*
* Claim the entry so that no one else will poke at it. * Claim the entry so that no one else will poke at it.
@ -55,12 +56,34 @@ void __weak arch_irq_work_raise(void)
*/ */
} }
#ifdef CONFIG_SMP
/* /*
* Enqueue the irq_work @entry unless it's already pending * Enqueue the irq_work @work on @cpu unless it's already pending
* somewhere. * somewhere.
* *
* Can be re-enqueued while the callback is still in progress. * Can be re-enqueued while the callback is still in progress.
*/ */
bool irq_work_queue_on(struct irq_work *work, int cpu)
{
/* All work should have been flushed before going offline */
WARN_ON_ONCE(cpu_is_offline(cpu));
/* Arch remote IPI send/receive backend aren't NMI safe */
WARN_ON_ONCE(in_nmi());
/* Only queue if not already pending */
if (!irq_work_claim(work))
return false;
if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
arch_send_call_function_single_ipi(cpu);
return true;
}
EXPORT_SYMBOL_GPL(irq_work_queue_on);
#endif
/* Enqueue the irq work @work on the current CPU */
bool irq_work_queue(struct irq_work *work) bool irq_work_queue(struct irq_work *work)
{ {
/* Only queue if not already pending */ /* Only queue if not already pending */
@ -70,15 +93,13 @@ bool irq_work_queue(struct irq_work *work)
/* Queue the entry and raise the IPI if needed. */ /* Queue the entry and raise the IPI if needed. */
preempt_disable(); preempt_disable();
llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); /* If the work is "lazy", handle it from next tick if any */
if (work->flags & IRQ_WORK_LAZY) {
/* if (llist_add(&work->llnode, &__get_cpu_var(lazy_list)) &&
* If the work is not "lazy" or the tick is stopped, raise the irq tick_nohz_tick_stopped())
* work interrupt (if supported by the arch), otherwise, just wait arch_irq_work_raise();
* for the next tick. } else {
*/ if (llist_add(&work->llnode, &__get_cpu_var(raised_list)))
if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
arch_irq_work_raise(); arch_irq_work_raise();
} }
@ -90,10 +111,11 @@ EXPORT_SYMBOL_GPL(irq_work_queue);
bool irq_work_needs_cpu(void) bool irq_work_needs_cpu(void)
{ {
struct llist_head *this_list; struct llist_head *raised, *lazy;
this_list = &__get_cpu_var(irq_work_list); raised = &__get_cpu_var(raised_list);
if (llist_empty(this_list)) lazy = &__get_cpu_var(lazy_list);
if (llist_empty(raised) && llist_empty(lazy))
return false; return false;
/* All work should have been flushed before going offline */ /* All work should have been flushed before going offline */
@ -102,28 +124,18 @@ bool irq_work_needs_cpu(void)
return true; return true;
} }
static void __irq_work_run(void) static void irq_work_run_list(struct llist_head *list)
{ {
unsigned long flags; unsigned long flags;
struct irq_work *work; struct irq_work *work;
struct llist_head *this_list;
struct llist_node *llnode; struct llist_node *llnode;
/*
* Reset the "raised" state right before we check the list because
* an NMI may enqueue after we find the list empty from the runner.
*/
__this_cpu_write(irq_work_raised, 0);
barrier();
this_list = &__get_cpu_var(irq_work_list);
if (llist_empty(this_list))
return;
BUG_ON(!irqs_disabled()); BUG_ON(!irqs_disabled());
llnode = llist_del_all(this_list); if (llist_empty(list))
return;
llnode = llist_del_all(list);
while (llnode != NULL) { while (llnode != NULL) {
work = llist_entry(llnode, struct irq_work, llnode); work = llist_entry(llnode, struct irq_work, llnode);
@ -149,13 +161,13 @@ static void __irq_work_run(void)
} }
/* /*
* Run the irq_work entries on this cpu. Requires to be ran from hardirq * hotplug calls this through:
* context with local IRQs disabled. * hotplug_cfd() -> flush_smp_call_function_queue()
*/ */
void irq_work_run(void) void irq_work_run(void)
{ {
BUG_ON(!in_irq()); irq_work_run_list(&__get_cpu_var(raised_list));
__irq_work_run(); irq_work_run_list(&__get_cpu_var(lazy_list));
} }
EXPORT_SYMBOL_GPL(irq_work_run); EXPORT_SYMBOL_GPL(irq_work_run);
@ -171,35 +183,3 @@ void irq_work_sync(struct irq_work *work)
cpu_relax(); cpu_relax();
} }
EXPORT_SYMBOL_GPL(irq_work_sync); EXPORT_SYMBOL_GPL(irq_work_sync);
#ifdef CONFIG_HOTPLUG_CPU
static int irq_work_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
switch (action) {
case CPU_DYING:
/* Called from stop_machine */
if (WARN_ON_ONCE(cpu != smp_processor_id()))
break;
__irq_work_run();
break;
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block cpu_notify;
static __init int irq_work_init_cpu_notifier(void)
{
cpu_notify.notifier_call = irq_work_cpu_notify;
cpu_notify.priority = 0;
register_cpu_notifier(&cpu_notify);
return 0;
}
device_initcall(irq_work_init_cpu_notifier);
#endif /* CONFIG_HOTPLUG_CPU */

View File

@ -28,12 +28,6 @@
#include <linux/compat.h> #include <linux/compat.h>
static int ptrace_trapping_sleep_fn(void *flags)
{
schedule();
return 0;
}
/* /*
* ptrace a task: make the debugger its new parent and * ptrace a task: make the debugger its new parent and
* move it to the ptrace list. * move it to the ptrace list.
@ -371,7 +365,7 @@ unlock_creds:
out: out:
if (!retval) { if (!retval) {
wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT,
ptrace_trapping_sleep_fn, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
proc_ptrace_connector(task, PTRACE_ATTACH); proc_ptrace_connector(task, PTRACE_ATTACH);
} }

View File

@ -139,6 +139,8 @@ void update_rq_clock(struct rq *rq)
return; return;
delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
if (delta < 0)
return;
rq->clock += delta; rq->clock += delta;
update_rq_clock_task(rq, delta); update_rq_clock_task(rq, delta);
} }
@ -243,6 +245,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
char buf[64]; char buf[64];
char *cmp; char *cmp;
int i; int i;
struct inode *inode;
if (cnt > 63) if (cnt > 63)
cnt = 63; cnt = 63;
@ -253,7 +256,11 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
buf[cnt] = 0; buf[cnt] = 0;
cmp = strstrip(buf); cmp = strstrip(buf);
/* Ensure the static_key remains in a consistent state */
inode = file_inode(filp);
mutex_lock(&inode->i_mutex);
i = sched_feat_set(cmp); i = sched_feat_set(cmp);
mutex_unlock(&inode->i_mutex);
if (i == __SCHED_FEAT_NR) if (i == __SCHED_FEAT_NR)
return -EINVAL; return -EINVAL;
@ -587,30 +594,31 @@ static bool set_nr_if_polling(struct task_struct *p)
#endif #endif
/* /*
* resched_task - mark a task 'to be rescheduled now'. * resched_curr - mark rq's current task 'to be rescheduled now'.
* *
* On UP this means the setting of the need_resched flag, on SMP it * On UP this means the setting of the need_resched flag, on SMP it
* might also involve a cross-CPU call to trigger the scheduler on * might also involve a cross-CPU call to trigger the scheduler on
* the target CPU. * the target CPU.
*/ */
void resched_task(struct task_struct *p) void resched_curr(struct rq *rq)
{ {
struct task_struct *curr = rq->curr;
int cpu; int cpu;
lockdep_assert_held(&task_rq(p)->lock); lockdep_assert_held(&rq->lock);
if (test_tsk_need_resched(p)) if (test_tsk_need_resched(curr))
return; return;
cpu = task_cpu(p); cpu = cpu_of(rq);
if (cpu == smp_processor_id()) { if (cpu == smp_processor_id()) {
set_tsk_need_resched(p); set_tsk_need_resched(curr);
set_preempt_need_resched(); set_preempt_need_resched();
return; return;
} }
if (set_nr_and_not_polling(p)) if (set_nr_and_not_polling(curr))
smp_send_reschedule(cpu); smp_send_reschedule(cpu);
else else
trace_sched_wake_idle_without_ipi(cpu); trace_sched_wake_idle_without_ipi(cpu);
@ -623,7 +631,7 @@ void resched_cpu(int cpu)
if (!raw_spin_trylock_irqsave(&rq->lock, flags)) if (!raw_spin_trylock_irqsave(&rq->lock, flags))
return; return;
resched_task(cpu_curr(cpu)); resched_curr(rq);
raw_spin_unlock_irqrestore(&rq->lock, flags); raw_spin_unlock_irqrestore(&rq->lock, flags);
} }
@ -684,10 +692,16 @@ static void wake_up_idle_cpu(int cpu)
static bool wake_up_full_nohz_cpu(int cpu) static bool wake_up_full_nohz_cpu(int cpu)
{ {
/*
* We just need the target to call irq_exit() and re-evaluate
* the next tick. The nohz full kick at least implies that.
* If needed we can still optimize that later with an
* empty IRQ.
*/
if (tick_nohz_full_cpu(cpu)) { if (tick_nohz_full_cpu(cpu)) {
if (cpu != smp_processor_id() || if (cpu != smp_processor_id() ||
tick_nohz_tick_stopped()) tick_nohz_tick_stopped())
smp_send_reschedule(cpu); tick_nohz_full_kick_cpu(cpu);
return true; return true;
} }
@ -730,18 +744,15 @@ static inline bool got_nohz_idle_kick(void)
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
bool sched_can_stop_tick(void) bool sched_can_stop_tick(void)
{ {
struct rq *rq; /*
* More than one running task need preemption.
* nr_running update is assumed to be visible
* after IPI is sent from wakers.
*/
if (this_rq()->nr_running > 1)
return false;
rq = this_rq(); return true;
/* Make sure rq->nr_running update is visible after the IPI */
smp_rmb();
/* More than one running task need preemption */
if (rq->nr_running > 1)
return false;
return true;
} }
#endif /* CONFIG_NO_HZ_FULL */ #endif /* CONFIG_NO_HZ_FULL */
@ -1022,7 +1033,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
if (class == rq->curr->sched_class) if (class == rq->curr->sched_class)
break; break;
if (class == p->sched_class) { if (class == p->sched_class) {
resched_task(rq->curr); resched_curr(rq);
break; break;
} }
} }
@ -1568,9 +1579,7 @@ void scheduler_ipi(void)
*/ */
preempt_fold_need_resched(); preempt_fold_need_resched();
if (llist_empty(&this_rq()->wake_list) if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
&& !tick_nohz_full_cpu(smp_processor_id())
&& !got_nohz_idle_kick())
return; return;
/* /*
@ -1587,7 +1596,6 @@ void scheduler_ipi(void)
* somewhat pessimize the simple resched case. * somewhat pessimize the simple resched case.
*/ */
irq_enter(); irq_enter();
tick_nohz_full_check();
sched_ttwu_pending(); sched_ttwu_pending();
/* /*
@ -2431,7 +2439,12 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
{ {
u64 ns = 0; u64 ns = 0;
if (task_current(rq, p)) { /*
* Must be ->curr _and_ ->on_rq. If dequeued, we would
* project cycles that may never be accounted to this
* thread, breaking clock_gettime().
*/
if (task_current(rq, p) && p->on_rq) {
update_rq_clock(rq); update_rq_clock(rq);
ns = rq_clock_task(rq) - p->se.exec_start; ns = rq_clock_task(rq) - p->se.exec_start;
if ((s64)ns < 0) if ((s64)ns < 0)
@ -2474,8 +2487,10 @@ unsigned long long task_sched_runtime(struct task_struct *p)
* If we race with it leaving cpu, we'll take a lock. So we're correct. * If we race with it leaving cpu, we'll take a lock. So we're correct.
* If we race with it entering cpu, unaccounted time is 0. This is * If we race with it entering cpu, unaccounted time is 0. This is
* indistinguishable from the read occurring a few cycles earlier. * indistinguishable from the read occurring a few cycles earlier.
* If we see ->on_cpu without ->on_rq, the task is leaving, and has
* been accounted, so we're correct here as well.
*/ */
if (!p->on_cpu) if (!p->on_cpu || !p->on_rq)
return p->se.sum_exec_runtime; return p->se.sum_exec_runtime;
#endif #endif
@ -2971,7 +2986,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
} }
trace_sched_pi_setprio(p, prio); trace_sched_pi_setprio(p, prio);
p->pi_top_task = rt_mutex_get_top_task(p);
oldprio = p->prio; oldprio = p->prio;
prev_class = p->sched_class; prev_class = p->sched_class;
on_rq = p->on_rq; on_rq = p->on_rq;
@ -2991,8 +3005,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
* running task * running task
*/ */
if (dl_prio(prio)) { if (dl_prio(prio)) {
if (!dl_prio(p->normal_prio) || (p->pi_top_task && struct task_struct *pi_task = rt_mutex_get_top_task(p);
dl_entity_preempt(&p->pi_top_task->dl, &p->dl))) { if (!dl_prio(p->normal_prio) ||
(pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
p->dl.dl_boosted = 1; p->dl.dl_boosted = 1;
p->dl.dl_throttled = 0; p->dl.dl_throttled = 0;
enqueue_flag = ENQUEUE_REPLENISH; enqueue_flag = ENQUEUE_REPLENISH;
@ -3064,7 +3079,7 @@ void set_user_nice(struct task_struct *p, long nice)
* lowered its priority, then reschedule its CPU: * lowered its priority, then reschedule its CPU:
*/ */
if (delta < 0 || (delta > 0 && task_running(rq, p))) if (delta < 0 || (delta > 0 && task_running(rq, p)))
resched_task(rq->curr); resched_curr(rq);
} }
out_unlock: out_unlock:
task_rq_unlock(rq, p, &flags); task_rq_unlock(rq, p, &flags);
@ -3203,12 +3218,18 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
dl_se->dl_yielded = 0; dl_se->dl_yielded = 0;
} }
/*
* sched_setparam() passes in -1 for its policy, to let the functions
* it calls know not to change it.
*/
#define SETPARAM_POLICY -1
static void __setscheduler_params(struct task_struct *p, static void __setscheduler_params(struct task_struct *p,
const struct sched_attr *attr) const struct sched_attr *attr)
{ {
int policy = attr->sched_policy; int policy = attr->sched_policy;
if (policy == -1) /* setparam */ if (policy == SETPARAM_POLICY)
policy = p->policy; policy = p->policy;
p->policy = policy; p->policy = policy;
@ -3557,10 +3578,8 @@ static int _sched_setscheduler(struct task_struct *p, int policy,
.sched_nice = PRIO_TO_NICE(p->static_prio), .sched_nice = PRIO_TO_NICE(p->static_prio),
}; };
/* /* Fixup the legacy SCHED_RESET_ON_FORK hack. */
* Fixup the legacy SCHED_RESET_ON_FORK hack if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
*/
if (policy & SCHED_RESET_ON_FORK) {
attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
policy &= ~SCHED_RESET_ON_FORK; policy &= ~SCHED_RESET_ON_FORK;
attr.sched_policy = policy; attr.sched_policy = policy;
@ -3730,7 +3749,7 @@ SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
*/ */
SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
{ {
return do_sched_setscheduler(pid, -1, param); return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
} }
/** /**
@ -4285,7 +4304,7 @@ again:
* fairness. * fairness.
*/ */
if (preempt && rq != p_rq) if (preempt && rq != p_rq)
resched_task(p_rq->curr); resched_curr(p_rq);
} }
out_unlock: out_unlock:
@ -6465,6 +6484,20 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
sched_domain_level_max = max(sched_domain_level_max, sd->level); sched_domain_level_max = max(sched_domain_level_max, sd->level);
child->parent = sd; child->parent = sd;
sd->child = child; sd->child = child;
if (!cpumask_subset(sched_domain_span(child),
sched_domain_span(sd))) {
pr_err("BUG: arch topology borken\n");
#ifdef CONFIG_SCHED_DEBUG
pr_err(" the %s domain not a subset of the %s domain\n",
child->name, sd->name);
#endif
/* Fixup, ensure @sd has at least @child cpus. */
cpumask_or(sched_domain_span(sd),
sched_domain_span(sd),
sched_domain_span(child));
}
} }
set_domain_attribute(sd, attr); set_domain_attribute(sd, attr);
@ -7092,7 +7125,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
__setscheduler(rq, p, &attr); __setscheduler(rq, p, &attr);
if (on_rq) { if (on_rq) {
enqueue_task(rq, p, 0); enqueue_task(rq, p, 0);
resched_task(rq->curr); resched_curr(rq);
} }
check_class_changed(rq, p, prev_class, old_prio); check_class_changed(rq, p, prev_class, old_prio);
@ -7803,6 +7836,11 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
if (period > max_cfs_quota_period) if (period > max_cfs_quota_period)
return -EINVAL; return -EINVAL;
/*
* Prevent race between setting of cfs_rq->runtime_enabled and
* unthrottle_offline_cfs_rqs().
*/
get_online_cpus();
mutex_lock(&cfs_constraints_mutex); mutex_lock(&cfs_constraints_mutex);
ret = __cfs_schedulable(tg, period, quota); ret = __cfs_schedulable(tg, period, quota);
if (ret) if (ret)
@ -7828,7 +7866,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
} }
raw_spin_unlock_irq(&cfs_b->lock); raw_spin_unlock_irq(&cfs_b->lock);
for_each_possible_cpu(i) { for_each_online_cpu(i) {
struct cfs_rq *cfs_rq = tg->cfs_rq[i]; struct cfs_rq *cfs_rq = tg->cfs_rq[i];
struct rq *rq = cfs_rq->rq; struct rq *rq = cfs_rq->rq;
@ -7844,6 +7882,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
cfs_bandwidth_usage_dec(); cfs_bandwidth_usage_dec();
out_unlock: out_unlock:
mutex_unlock(&cfs_constraints_mutex); mutex_unlock(&cfs_constraints_mutex);
put_online_cpus();
return ret; return ret;
} }

View File

@ -306,7 +306,7 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
* the overrunning entity can't interfere with other entity in the system and * the overrunning entity can't interfere with other entity in the system and
* can't make them miss their deadlines. Reasons why this kind of overruns * can't make them miss their deadlines. Reasons why this kind of overruns
* could happen are, typically, a entity voluntarily trying to overcome its * could happen are, typically, a entity voluntarily trying to overcome its
* runtime, or it just underestimated it during sched_setscheduler_ex(). * runtime, or it just underestimated it during sched_setattr().
*/ */
static void replenish_dl_entity(struct sched_dl_entity *dl_se, static void replenish_dl_entity(struct sched_dl_entity *dl_se,
struct sched_dl_entity *pi_se) struct sched_dl_entity *pi_se)
@ -535,7 +535,7 @@ again:
if (task_has_dl_policy(rq->curr)) if (task_has_dl_policy(rq->curr))
check_preempt_curr_dl(rq, p, 0); check_preempt_curr_dl(rq, p, 0);
else else
resched_task(rq->curr); resched_curr(rq);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
* Queueing this task back might have overloaded rq, * Queueing this task back might have overloaded rq,
@ -634,7 +634,7 @@ static void update_curr_dl(struct rq *rq)
enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
if (!is_leftmost(curr, &rq->dl)) if (!is_leftmost(curr, &rq->dl))
resched_task(curr); resched_curr(rq);
} }
/* /*
@ -964,7 +964,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
cpudl_find(&rq->rd->cpudl, p, NULL) != -1) cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
return; return;
resched_task(rq->curr); resched_curr(rq);
} }
static int pull_dl_task(struct rq *this_rq); static int pull_dl_task(struct rq *this_rq);
@ -979,7 +979,7 @@ static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
int flags) int flags)
{ {
if (dl_entity_preempt(&p->dl, &rq->curr->dl)) { if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
resched_task(rq->curr); resched_curr(rq);
return; return;
} }
@ -1333,7 +1333,7 @@ retry:
if (dl_task(rq->curr) && if (dl_task(rq->curr) &&
dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
rq->curr->nr_cpus_allowed > 1) { rq->curr->nr_cpus_allowed > 1) {
resched_task(rq->curr); resched_curr(rq);
return 0; return 0;
} }
@ -1373,7 +1373,7 @@ retry:
set_task_cpu(next_task, later_rq->cpu); set_task_cpu(next_task, later_rq->cpu);
activate_task(later_rq, next_task, 0); activate_task(later_rq, next_task, 0);
resched_task(later_rq->curr); resched_curr(later_rq);
double_unlock_balance(rq, later_rq); double_unlock_balance(rq, later_rq);
@ -1632,14 +1632,14 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
*/ */
if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) && if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
rq->curr == p) rq->curr == p)
resched_task(p); resched_curr(rq);
#else #else
/* /*
* Again, we don't know if p has a earlier * Again, we don't know if p has a earlier
* or later deadline, so let's blindly set a * or later deadline, so let's blindly set a
* (maybe not needed) rescheduling point. * (maybe not needed) rescheduling point.
*/ */
resched_task(p); resched_curr(rq);
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
} else } else
switched_to_dl(rq, p); switched_to_dl(rq, p);

View File

@ -1062,7 +1062,6 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
if (!cpus) if (!cpus)
return; return;
ns->load = (ns->load * SCHED_CAPACITY_SCALE) / ns->compute_capacity;
ns->task_capacity = ns->task_capacity =
DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE); DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE);
ns->has_free_capacity = (ns->nr_running < ns->task_capacity); ns->has_free_capacity = (ns->nr_running < ns->task_capacity);
@ -1096,18 +1095,30 @@ static void task_numa_assign(struct task_numa_env *env,
env->best_cpu = env->dst_cpu; env->best_cpu = env->dst_cpu;
} }
static bool load_too_imbalanced(long orig_src_load, long orig_dst_load, static bool load_too_imbalanced(long src_load, long dst_load,
long src_load, long dst_load,
struct task_numa_env *env) struct task_numa_env *env)
{ {
long imb, old_imb; long imb, old_imb;
long orig_src_load, orig_dst_load;
long src_capacity, dst_capacity;
/*
* The load is corrected for the CPU capacity available on each node.
*
* src_load dst_load
* ------------ vs ---------
* src_capacity dst_capacity
*/
src_capacity = env->src_stats.compute_capacity;
dst_capacity = env->dst_stats.compute_capacity;
/* We care about the slope of the imbalance, not the direction. */ /* We care about the slope of the imbalance, not the direction. */
if (dst_load < src_load) if (dst_load < src_load)
swap(dst_load, src_load); swap(dst_load, src_load);
/* Is the difference below the threshold? */ /* Is the difference below the threshold? */
imb = dst_load * 100 - src_load * env->imbalance_pct; imb = dst_load * src_capacity * 100 -
src_load * dst_capacity * env->imbalance_pct;
if (imb <= 0) if (imb <= 0)
return false; return false;
@ -1115,10 +1126,14 @@ static bool load_too_imbalanced(long orig_src_load, long orig_dst_load,
* The imbalance is above the allowed threshold. * The imbalance is above the allowed threshold.
* Compare it with the old imbalance. * Compare it with the old imbalance.
*/ */
orig_src_load = env->src_stats.load;
orig_dst_load = env->dst_stats.load;
if (orig_dst_load < orig_src_load) if (orig_dst_load < orig_src_load)
swap(orig_dst_load, orig_src_load); swap(orig_dst_load, orig_src_load);
old_imb = orig_dst_load * 100 - orig_src_load * env->imbalance_pct; old_imb = orig_dst_load * src_capacity * 100 -
orig_src_load * dst_capacity * env->imbalance_pct;
/* Would this change make things worse? */ /* Would this change make things worse? */
return (imb > old_imb); return (imb > old_imb);
@ -1136,10 +1151,10 @@ static void task_numa_compare(struct task_numa_env *env,
struct rq *src_rq = cpu_rq(env->src_cpu); struct rq *src_rq = cpu_rq(env->src_cpu);
struct rq *dst_rq = cpu_rq(env->dst_cpu); struct rq *dst_rq = cpu_rq(env->dst_cpu);
struct task_struct *cur; struct task_struct *cur;
long orig_src_load, src_load; long src_load, dst_load;
long orig_dst_load, dst_load;
long load; long load;
long imp = (groupimp > 0) ? groupimp : taskimp; long imp = env->p->numa_group ? groupimp : taskimp;
long moveimp = imp;
rcu_read_lock(); rcu_read_lock();
cur = ACCESS_ONCE(dst_rq->curr); cur = ACCESS_ONCE(dst_rq->curr);
@ -1177,11 +1192,6 @@ static void task_numa_compare(struct task_numa_env *env,
* itself (not part of a group), use the task weight * itself (not part of a group), use the task weight
* instead. * instead.
*/ */
if (env->p->numa_group)
imp = groupimp;
else
imp = taskimp;
if (cur->numa_group) if (cur->numa_group)
imp += group_weight(cur, env->src_nid) - imp += group_weight(cur, env->src_nid) -
group_weight(cur, env->dst_nid); group_weight(cur, env->dst_nid);
@ -1191,7 +1201,7 @@ static void task_numa_compare(struct task_numa_env *env,
} }
} }
if (imp < env->best_imp) if (imp <= env->best_imp && moveimp <= env->best_imp)
goto unlock; goto unlock;
if (!cur) { if (!cur) {
@ -1204,20 +1214,34 @@ static void task_numa_compare(struct task_numa_env *env,
} }
/* Balance doesn't matter much if we're running a task per cpu */ /* Balance doesn't matter much if we're running a task per cpu */
if (src_rq->nr_running == 1 && dst_rq->nr_running == 1) if (imp > env->best_imp && src_rq->nr_running == 1 &&
dst_rq->nr_running == 1)
goto assign; goto assign;
/* /*
* In the overloaded case, try and keep the load balanced. * In the overloaded case, try and keep the load balanced.
*/ */
balance: balance:
orig_dst_load = env->dst_stats.load;
orig_src_load = env->src_stats.load;
/* XXX missing capacity terms */
load = task_h_load(env->p); load = task_h_load(env->p);
dst_load = orig_dst_load + load; dst_load = env->dst_stats.load + load;
src_load = orig_src_load - load; src_load = env->src_stats.load - load;
if (moveimp > imp && moveimp > env->best_imp) {
/*
* If the improvement from just moving env->p direction is
* better than swapping tasks around, check if a move is
* possible. Store a slightly smaller score than moveimp,
* so an actually idle CPU will win.
*/
if (!load_too_imbalanced(src_load, dst_load, env)) {
imp = moveimp - 1;
cur = NULL;
goto assign;
}
}
if (imp <= env->best_imp)
goto unlock;
if (cur) { if (cur) {
load = task_h_load(cur); load = task_h_load(cur);
@ -1225,8 +1249,7 @@ balance:
src_load += load; src_load += load;
} }
if (load_too_imbalanced(orig_src_load, orig_dst_load, if (load_too_imbalanced(src_load, dst_load, env))
src_load, dst_load, env))
goto unlock; goto unlock;
assign: assign:
@ -1302,9 +1325,8 @@ static int task_numa_migrate(struct task_struct *p)
groupimp = group_weight(p, env.dst_nid) - groupweight; groupimp = group_weight(p, env.dst_nid) - groupweight;
update_numa_stats(&env.dst_stats, env.dst_nid); update_numa_stats(&env.dst_stats, env.dst_nid);
/* If the preferred nid has free capacity, try to use it. */ /* Try to find a spot on the preferred nid. */
if (env.dst_stats.has_free_capacity) task_numa_find_cpu(&env, taskimp, groupimp);
task_numa_find_cpu(&env, taskimp, groupimp);
/* No space available on the preferred nid. Look elsewhere. */ /* No space available on the preferred nid. Look elsewhere. */
if (env.best_cpu == -1) { if (env.best_cpu == -1) {
@ -1324,10 +1346,6 @@ static int task_numa_migrate(struct task_struct *p)
} }
} }
/* No better CPU than the current one was found. */
if (env.best_cpu == -1)
return -EAGAIN;
/* /*
* If the task is part of a workload that spans multiple NUMA nodes, * If the task is part of a workload that spans multiple NUMA nodes,
* and is migrating into one of the workload's active nodes, remember * and is migrating into one of the workload's active nodes, remember
@ -1336,8 +1354,19 @@ static int task_numa_migrate(struct task_struct *p)
* A task that migrated to a second choice node will be better off * A task that migrated to a second choice node will be better off
* trying for a better one later. Do not set the preferred node here. * trying for a better one later. Do not set the preferred node here.
*/ */
if (p->numa_group && node_isset(env.dst_nid, p->numa_group->active_nodes)) if (p->numa_group) {
sched_setnuma(p, env.dst_nid); if (env.best_cpu == -1)
nid = env.src_nid;
else
nid = env.dst_nid;
if (node_isset(nid, p->numa_group->active_nodes))
sched_setnuma(p, env.dst_nid);
}
/* No better CPU than the current one was found. */
if (env.best_cpu == -1)
return -EAGAIN;
/* /*
* Reset the scan period if the task is being rescheduled on an * Reset the scan period if the task is being rescheduled on an
@ -1415,12 +1444,12 @@ static void update_numa_active_node_mask(struct numa_group *numa_group)
/* /*
* When adapting the scan rate, the period is divided into NUMA_PERIOD_SLOTS * When adapting the scan rate, the period is divided into NUMA_PERIOD_SLOTS
* increments. The more local the fault statistics are, the higher the scan * increments. The more local the fault statistics are, the higher the scan
* period will be for the next scan window. If local/remote ratio is below * period will be for the next scan window. If local/(local+remote) ratio is
* NUMA_PERIOD_THRESHOLD (where range of ratio is 1..NUMA_PERIOD_SLOTS) the * below NUMA_PERIOD_THRESHOLD (where range of ratio is 1..NUMA_PERIOD_SLOTS)
* scan period will decrease * the scan period will decrease. Aim for 70% local accesses.
*/ */
#define NUMA_PERIOD_SLOTS 10 #define NUMA_PERIOD_SLOTS 10
#define NUMA_PERIOD_THRESHOLD 3 #define NUMA_PERIOD_THRESHOLD 7
/* /*
* Increase the scan period (slow down scanning) if the majority of * Increase the scan period (slow down scanning) if the majority of
@ -1595,30 +1624,17 @@ static void task_numa_placement(struct task_struct *p)
if (p->numa_group) { if (p->numa_group) {
update_numa_active_node_mask(p->numa_group); update_numa_active_node_mask(p->numa_group);
/*
* If the preferred task and group nids are different,
* iterate over the nodes again to find the best place.
*/
if (max_nid != max_group_nid) {
unsigned long weight, max_weight = 0;
for_each_online_node(nid) {
weight = task_weight(p, nid) + group_weight(p, nid);
if (weight > max_weight) {
max_weight = weight;
max_nid = nid;
}
}
}
spin_unlock_irq(group_lock); spin_unlock_irq(group_lock);
max_nid = max_group_nid;
} }
/* Preferred node as the node with the most faults */ if (max_faults) {
if (max_faults && max_nid != p->numa_preferred_nid) { /* Set the new preferred node */
/* Update the preferred nid and migrate task if possible */ if (max_nid != p->numa_preferred_nid)
sched_setnuma(p, max_nid); sched_setnuma(p, max_nid);
numa_migrate_preferred(p);
if (task_node(p) != p->numa_preferred_nid)
numa_migrate_preferred(p);
} }
} }
@ -2899,7 +2915,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
ideal_runtime = sched_slice(cfs_rq, curr); ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
if (delta_exec > ideal_runtime) { if (delta_exec > ideal_runtime) {
resched_task(rq_of(cfs_rq)->curr); resched_curr(rq_of(cfs_rq));
/* /*
* The current task ran long enough, ensure it doesn't get * The current task ran long enough, ensure it doesn't get
* re-elected due to buddy favours. * re-elected due to buddy favours.
@ -2923,7 +2939,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
return; return;
if (delta > ideal_runtime) if (delta > ideal_runtime)
resched_task(rq_of(cfs_rq)->curr); resched_curr(rq_of(cfs_rq));
} }
static void static void
@ -3063,7 +3079,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
* validating it and just reschedule. * validating it and just reschedule.
*/ */
if (queued) { if (queued) {
resched_task(rq_of(cfs_rq)->curr); resched_curr(rq_of(cfs_rq));
return; return;
} }
/* /*
@ -3254,7 +3270,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
* hierarchy can be throttled * hierarchy can be throttled
*/ */
if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
resched_task(rq_of(cfs_rq)->curr); resched_curr(rq_of(cfs_rq));
} }
static __always_inline static __always_inline
@ -3360,7 +3376,11 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
cfs_rq->throttled = 1; cfs_rq->throttled = 1;
cfs_rq->throttled_clock = rq_clock(rq); cfs_rq->throttled_clock = rq_clock(rq);
raw_spin_lock(&cfs_b->lock); raw_spin_lock(&cfs_b->lock);
list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); /*
* Add to the _head_ of the list, so that an already-started
* distribute_cfs_runtime will not see us
*/
list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
if (!cfs_b->timer_active) if (!cfs_b->timer_active)
__start_cfs_bandwidth(cfs_b, false); __start_cfs_bandwidth(cfs_b, false);
raw_spin_unlock(&cfs_b->lock); raw_spin_unlock(&cfs_b->lock);
@ -3410,14 +3430,15 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
/* determine whether we need to wake up potentially idle cpu */ /* determine whether we need to wake up potentially idle cpu */
if (rq->curr == rq->idle && rq->cfs.nr_running) if (rq->curr == rq->idle && rq->cfs.nr_running)
resched_task(rq->curr); resched_curr(rq);
} }
static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
u64 remaining, u64 expires) u64 remaining, u64 expires)
{ {
struct cfs_rq *cfs_rq; struct cfs_rq *cfs_rq;
u64 runtime = remaining; u64 runtime;
u64 starting_runtime = remaining;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
@ -3448,7 +3469,7 @@ next:
} }
rcu_read_unlock(); rcu_read_unlock();
return remaining; return starting_runtime - remaining;
} }
/* /*
@ -3494,22 +3515,17 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
/* account preceding periods in which throttling occurred */ /* account preceding periods in which throttling occurred */
cfs_b->nr_throttled += overrun; cfs_b->nr_throttled += overrun;
/*
* There are throttled entities so we must first use the new bandwidth
* to unthrottle them before making it generally available. This
* ensures that all existing debts will be paid before a new cfs_rq is
* allowed to run.
*/
runtime = cfs_b->runtime;
runtime_expires = cfs_b->runtime_expires; runtime_expires = cfs_b->runtime_expires;
cfs_b->runtime = 0;
/* /*
* This check is repeated as we are holding onto the new bandwidth * This check is repeated as we are holding onto the new bandwidth while
* while we unthrottle. This can potentially race with an unthrottled * we unthrottle. This can potentially race with an unthrottled group
* group trying to acquire new bandwidth from the global pool. * trying to acquire new bandwidth from the global pool. This can result
* in us over-using our runtime if it is all used during this loop, but
* only by limited amounts in that extreme case.
*/ */
while (throttled && runtime > 0) { while (throttled && cfs_b->runtime > 0) {
runtime = cfs_b->runtime;
raw_spin_unlock(&cfs_b->lock); raw_spin_unlock(&cfs_b->lock);
/* we can't nest cfs_b->lock while distributing bandwidth */ /* we can't nest cfs_b->lock while distributing bandwidth */
runtime = distribute_cfs_runtime(cfs_b, runtime, runtime = distribute_cfs_runtime(cfs_b, runtime,
@ -3517,10 +3533,10 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
raw_spin_lock(&cfs_b->lock); raw_spin_lock(&cfs_b->lock);
throttled = !list_empty(&cfs_b->throttled_cfs_rq); throttled = !list_empty(&cfs_b->throttled_cfs_rq);
cfs_b->runtime -= min(runtime, cfs_b->runtime);
} }
/* return (any) remaining runtime */
cfs_b->runtime = runtime;
/* /*
* While we are ensured activity in the period following an * While we are ensured activity in the period following an
* unthrottle, this also covers the case in which the new bandwidth is * unthrottle, this also covers the case in which the new bandwidth is
@ -3631,10 +3647,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
return; return;
} }
if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) { if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice)
runtime = cfs_b->runtime; runtime = cfs_b->runtime;
cfs_b->runtime = 0;
}
expires = cfs_b->runtime_expires; expires = cfs_b->runtime_expires;
raw_spin_unlock(&cfs_b->lock); raw_spin_unlock(&cfs_b->lock);
@ -3645,7 +3660,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
raw_spin_lock(&cfs_b->lock); raw_spin_lock(&cfs_b->lock);
if (expires == cfs_b->runtime_expires) if (expires == cfs_b->runtime_expires)
cfs_b->runtime = runtime; cfs_b->runtime -= min(runtime, cfs_b->runtime);
raw_spin_unlock(&cfs_b->lock); raw_spin_unlock(&cfs_b->lock);
} }
@ -3775,6 +3790,19 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
hrtimer_cancel(&cfs_b->slack_timer); hrtimer_cancel(&cfs_b->slack_timer);
} }
static void __maybe_unused update_runtime_enabled(struct rq *rq)
{
struct cfs_rq *cfs_rq;
for_each_leaf_cfs_rq(rq, cfs_rq) {
struct cfs_bandwidth *cfs_b = &cfs_rq->tg->cfs_bandwidth;
raw_spin_lock(&cfs_b->lock);
cfs_rq->runtime_enabled = cfs_b->quota != RUNTIME_INF;
raw_spin_unlock(&cfs_b->lock);
}
}
static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
{ {
struct cfs_rq *cfs_rq; struct cfs_rq *cfs_rq;
@ -3788,6 +3816,12 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
* there's some valid quota amount * there's some valid quota amount
*/ */
cfs_rq->runtime_remaining = 1; cfs_rq->runtime_remaining = 1;
/*
* Offline rq is schedulable till cpu is completely disabled
* in take_cpu_down(), so we prevent new cfs throttling here.
*/
cfs_rq->runtime_enabled = 0;
if (cfs_rq_throttled(cfs_rq)) if (cfs_rq_throttled(cfs_rq))
unthrottle_cfs_rq(cfs_rq); unthrottle_cfs_rq(cfs_rq);
} }
@ -3831,6 +3865,7 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
return NULL; return NULL;
} }
static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
static inline void update_runtime_enabled(struct rq *rq) {}
static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {} static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
#endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_CFS_BANDWIDTH */
@ -3854,7 +3889,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
if (delta < 0) { if (delta < 0) {
if (rq->curr == p) if (rq->curr == p)
resched_task(p); resched_curr(rq);
return; return;
} }
@ -4723,7 +4758,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
return; return;
preempt: preempt:
resched_task(curr); resched_curr(rq);
/* /*
* Only set the backward buddy when the current task is still * Only set the backward buddy when the current task is still
* on the rq. This can happen when a wakeup gets interleaved * on the rq. This can happen when a wakeup gets interleaved
@ -5094,8 +5129,7 @@ static void move_task(struct task_struct *p, struct lb_env *env)
/* /*
* Is this task likely cache-hot: * Is this task likely cache-hot:
*/ */
static int static int task_hot(struct task_struct *p, struct lb_env *env)
task_hot(struct task_struct *p, u64 now)
{ {
s64 delta; s64 delta;
@ -5108,7 +5142,7 @@ task_hot(struct task_struct *p, u64 now)
/* /*
* Buddy candidates are cache hot: * Buddy candidates are cache hot:
*/ */
if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running && if (sched_feat(CACHE_HOT_BUDDY) && env->dst_rq->nr_running &&
(&p->se == cfs_rq_of(&p->se)->next || (&p->se == cfs_rq_of(&p->se)->next ||
&p->se == cfs_rq_of(&p->se)->last)) &p->se == cfs_rq_of(&p->se)->last))
return 1; return 1;
@ -5118,7 +5152,7 @@ task_hot(struct task_struct *p, u64 now)
if (sysctl_sched_migration_cost == 0) if (sysctl_sched_migration_cost == 0)
return 0; return 0;
delta = now - p->se.exec_start; delta = rq_clock_task(env->src_rq) - p->se.exec_start;
return delta < (s64)sysctl_sched_migration_cost; return delta < (s64)sysctl_sched_migration_cost;
} }
@ -5272,7 +5306,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
* 2) task is cache cold, or * 2) task is cache cold, or
* 3) too many balance attempts have failed. * 3) too many balance attempts have failed.
*/ */
tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq)); tsk_cache_hot = task_hot(p, env);
if (!tsk_cache_hot) if (!tsk_cache_hot)
tsk_cache_hot = migrate_degrades_locality(p, env); tsk_cache_hot = migrate_degrades_locality(p, env);
@ -5864,10 +5898,12 @@ static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *gro
* @load_idx: Load index of sched_domain of this_cpu for load calc. * @load_idx: Load index of sched_domain of this_cpu for load calc.
* @local_group: Does group contain this_cpu. * @local_group: Does group contain this_cpu.
* @sgs: variable to hold the statistics for this group. * @sgs: variable to hold the statistics for this group.
* @overload: Indicate more than one runnable task for any CPU.
*/ */
static inline void update_sg_lb_stats(struct lb_env *env, static inline void update_sg_lb_stats(struct lb_env *env,
struct sched_group *group, int load_idx, struct sched_group *group, int load_idx,
int local_group, struct sg_lb_stats *sgs) int local_group, struct sg_lb_stats *sgs,
bool *overload)
{ {
unsigned long load; unsigned long load;
int i; int i;
@ -5885,6 +5921,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->group_load += load; sgs->group_load += load;
sgs->sum_nr_running += rq->nr_running; sgs->sum_nr_running += rq->nr_running;
if (rq->nr_running > 1)
*overload = true;
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
sgs->nr_numa_running += rq->nr_numa_running; sgs->nr_numa_running += rq->nr_numa_running;
sgs->nr_preferred_running += rq->nr_preferred_running; sgs->nr_preferred_running += rq->nr_preferred_running;
@ -5995,6 +6035,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
struct sched_group *sg = env->sd->groups; struct sched_group *sg = env->sd->groups;
struct sg_lb_stats tmp_sgs; struct sg_lb_stats tmp_sgs;
int load_idx, prefer_sibling = 0; int load_idx, prefer_sibling = 0;
bool overload = false;
if (child && child->flags & SD_PREFER_SIBLING) if (child && child->flags & SD_PREFER_SIBLING)
prefer_sibling = 1; prefer_sibling = 1;
@ -6015,7 +6056,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
update_group_capacity(env->sd, env->dst_cpu); update_group_capacity(env->sd, env->dst_cpu);
} }
update_sg_lb_stats(env, sg, load_idx, local_group, sgs); update_sg_lb_stats(env, sg, load_idx, local_group, sgs,
&overload);
if (local_group) if (local_group)
goto next_group; goto next_group;
@ -6049,6 +6091,13 @@ next_group:
if (env->sd->flags & SD_NUMA) if (env->sd->flags & SD_NUMA)
env->fbq_type = fbq_classify_group(&sds->busiest_stat); env->fbq_type = fbq_classify_group(&sds->busiest_stat);
if (!env->sd->parent) {
/* update overload indicator if we are at root domain */
if (env->dst_rq->rd->overload != overload)
env->dst_rq->rd->overload = overload;
}
} }
/** /**
@ -6767,7 +6816,8 @@ static int idle_balance(struct rq *this_rq)
*/ */
this_rq->idle_stamp = rq_clock(this_rq); this_rq->idle_stamp = rq_clock(this_rq);
if (this_rq->avg_idle < sysctl_sched_migration_cost) { if (this_rq->avg_idle < sysctl_sched_migration_cost ||
!this_rq->rd->overload) {
rcu_read_lock(); rcu_read_lock();
sd = rcu_dereference_check_sched_domain(this_rq->sd); sd = rcu_dereference_check_sched_domain(this_rq->sd);
if (sd) if (sd)
@ -7325,6 +7375,8 @@ void trigger_load_balance(struct rq *rq)
static void rq_online_fair(struct rq *rq) static void rq_online_fair(struct rq *rq)
{ {
update_sysctl(); update_sysctl();
update_runtime_enabled(rq);
} }
static void rq_offline_fair(struct rq *rq) static void rq_offline_fair(struct rq *rq)
@ -7398,7 +7450,7 @@ static void task_fork_fair(struct task_struct *p)
* 'current' within the tree based on its new key value. * 'current' within the tree based on its new key value.
*/ */
swap(curr->vruntime, se->vruntime); swap(curr->vruntime, se->vruntime);
resched_task(rq->curr); resched_curr(rq);
} }
se->vruntime -= cfs_rq->min_vruntime; se->vruntime -= cfs_rq->min_vruntime;
@ -7423,7 +7475,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
*/ */
if (rq->curr == p) { if (rq->curr == p) {
if (p->prio > oldprio) if (p->prio > oldprio)
resched_task(rq->curr); resched_curr(rq);
} else } else
check_preempt_curr(rq, p, 0); check_preempt_curr(rq, p, 0);
} }
@ -7486,7 +7538,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p)
* if we can still preempt the current task. * if we can still preempt the current task.
*/ */
if (rq->curr == p) if (rq->curr == p)
resched_task(rq->curr); resched_curr(rq);
else else
check_preempt_curr(rq, p, 0); check_preempt_curr(rq, p, 0);
} }

View File

@ -79,7 +79,7 @@ static void cpuidle_idle_call(void)
struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
int next_state, entered_state; int next_state, entered_state;
bool broadcast; unsigned int broadcast;
/* /*
* Check if the idle task must be rescheduled. If it is the * Check if the idle task must be rescheduled. If it is the
@ -135,7 +135,7 @@ use_default:
goto exit_idle; goto exit_idle;
} }
broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); broadcast = drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP;
/* /*
* Tell the time framework to switch to a broadcast timer * Tell the time framework to switch to a broadcast timer

View File

@ -20,7 +20,7 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
*/ */
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags) static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
{ {
resched_task(rq->idle); resched_curr(rq);
} }
static struct task_struct * static struct task_struct *

View File

@ -463,9 +463,10 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{ {
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
struct rq *rq = rq_of_rt_rq(rt_rq);
struct sched_rt_entity *rt_se; struct sched_rt_entity *rt_se;
int cpu = cpu_of(rq_of_rt_rq(rt_rq)); int cpu = cpu_of(rq);
rt_se = rt_rq->tg->rt_se[cpu]; rt_se = rt_rq->tg->rt_se[cpu];
@ -476,7 +477,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
enqueue_rt_entity(rt_se, false); enqueue_rt_entity(rt_se, false);
if (rt_rq->highest_prio.curr < curr->prio) if (rt_rq->highest_prio.curr < curr->prio)
resched_task(curr); resched_curr(rq);
} }
} }
@ -566,7 +567,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
return; return;
enqueue_top_rt_rq(rt_rq); enqueue_top_rt_rq(rt_rq);
resched_task(rq->curr); resched_curr(rq);
} }
static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
@ -740,6 +741,9 @@ balanced:
rt_rq->rt_throttled = 0; rt_rq->rt_throttled = 0;
raw_spin_unlock(&rt_rq->rt_runtime_lock); raw_spin_unlock(&rt_rq->rt_runtime_lock);
raw_spin_unlock(&rt_b->rt_runtime_lock); raw_spin_unlock(&rt_b->rt_runtime_lock);
/* Make rt_rq available for pick_next_task() */
sched_rt_rq_enqueue(rt_rq);
} }
} }
@ -948,7 +952,7 @@ static void update_curr_rt(struct rq *rq)
raw_spin_lock(&rt_rq->rt_runtime_lock); raw_spin_lock(&rt_rq->rt_runtime_lock);
rt_rq->rt_time += delta_exec; rt_rq->rt_time += delta_exec;
if (sched_rt_runtime_exceeded(rt_rq)) if (sched_rt_runtime_exceeded(rt_rq))
resched_task(curr); resched_curr(rq);
raw_spin_unlock(&rt_rq->rt_runtime_lock); raw_spin_unlock(&rt_rq->rt_runtime_lock);
} }
} }
@ -1363,7 +1367,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
* to try and push current away: * to try and push current away:
*/ */
requeue_task_rt(rq, p, 1); requeue_task_rt(rq, p, 1);
resched_task(rq->curr); resched_curr(rq);
} }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
@ -1374,7 +1378,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
{ {
if (p->prio < rq->curr->prio) { if (p->prio < rq->curr->prio) {
resched_task(rq->curr); resched_curr(rq);
return; return;
} }
@ -1690,7 +1694,7 @@ retry:
* just reschedule current. * just reschedule current.
*/ */
if (unlikely(next_task->prio < rq->curr->prio)) { if (unlikely(next_task->prio < rq->curr->prio)) {
resched_task(rq->curr); resched_curr(rq);
return 0; return 0;
} }
@ -1737,7 +1741,7 @@ retry:
activate_task(lowest_rq, next_task, 0); activate_task(lowest_rq, next_task, 0);
ret = 1; ret = 1;
resched_task(lowest_rq->curr); resched_curr(lowest_rq);
double_unlock_balance(rq, lowest_rq); double_unlock_balance(rq, lowest_rq);
@ -1936,7 +1940,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
return; return;
if (pull_rt_task(rq)) if (pull_rt_task(rq))
resched_task(rq->curr); resched_curr(rq);
} }
void __init init_sched_rt_class(void) void __init init_sched_rt_class(void)
@ -1974,7 +1978,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
check_resched = 0; check_resched = 0;
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
if (check_resched && p->prio < rq->curr->prio) if (check_resched && p->prio < rq->curr->prio)
resched_task(rq->curr); resched_curr(rq);
} }
} }
@ -2003,11 +2007,11 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
* Only reschedule if p is still on the same runqueue. * Only reschedule if p is still on the same runqueue.
*/ */
if (p->prio > rq->rt.highest_prio.curr && rq->curr == p) if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
resched_task(p); resched_curr(rq);
#else #else
/* For UP simply resched on drop of prio */ /* For UP simply resched on drop of prio */
if (oldprio < p->prio) if (oldprio < p->prio)
resched_task(p); resched_curr(rq);
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
} else { } else {
/* /*
@ -2016,7 +2020,7 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
* then reschedule. * then reschedule.
*/ */
if (p->prio < rq->curr->prio) if (p->prio < rq->curr->prio)
resched_task(rq->curr); resched_curr(rq);
} }
} }

View File

@ -477,6 +477,9 @@ struct root_domain {
cpumask_var_t span; cpumask_var_t span;
cpumask_var_t online; cpumask_var_t online;
/* Indicate more than one runnable task for any CPU */
bool overload;
/* /*
* The bit corresponding to a CPU gets set here if such CPU has more * The bit corresponding to a CPU gets set here if such CPU has more
* than one runnable -deadline task (as it is below for RT tasks). * than one runnable -deadline task (as it is below for RT tasks).
@ -884,20 +887,10 @@ enum {
#undef SCHED_FEAT #undef SCHED_FEAT
#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
static __always_inline bool static_branch__true(struct static_key *key)
{
return static_key_true(key); /* Not out of line branch. */
}
static __always_inline bool static_branch__false(struct static_key *key)
{
return static_key_false(key); /* Out of line branch. */
}
#define SCHED_FEAT(name, enabled) \ #define SCHED_FEAT(name, enabled) \
static __always_inline bool static_branch_##name(struct static_key *key) \ static __always_inline bool static_branch_##name(struct static_key *key) \
{ \ { \
return static_branch__##enabled(key); \ return static_key_##enabled(key); \
} }
#include "features.h" #include "features.h"
@ -1196,7 +1189,7 @@ extern void init_sched_rt_class(void);
extern void init_sched_fair_class(void); extern void init_sched_fair_class(void);
extern void init_sched_dl_class(void); extern void init_sched_dl_class(void);
extern void resched_task(struct task_struct *p); extern void resched_curr(struct rq *rq);
extern void resched_cpu(int cpu); extern void resched_cpu(int cpu);
extern struct rt_bandwidth def_rt_bandwidth; extern struct rt_bandwidth def_rt_bandwidth;
@ -1218,15 +1211,26 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
rq->nr_running = prev_nr + count; rq->nr_running = prev_nr + count;
#ifdef CONFIG_NO_HZ_FULL
if (prev_nr < 2 && rq->nr_running >= 2) { if (prev_nr < 2 && rq->nr_running >= 2) {
if (tick_nohz_full_cpu(rq->cpu)) { #ifdef CONFIG_SMP
/* Order rq->nr_running write against the IPI */ if (!rq->rd->overload)
smp_wmb(); rq->rd->overload = true;
smp_send_reschedule(rq->cpu);
}
}
#endif #endif
#ifdef CONFIG_NO_HZ_FULL
if (tick_nohz_full_cpu(rq->cpu)) {
/*
* Tick is needed if more than one task runs on a CPU.
* Send the target an IPI to kick it out of nohz mode.
*
* We assume that IPI implies full memory barrier and the
* new value of rq->nr_running is visible on reception
* from the target.
*/
tick_nohz_full_kick_cpu(rq->cpu);
}
#endif
}
} }
static inline void sub_nr_running(struct rq *rq, unsigned count) static inline void sub_nr_running(struct rq *rq, unsigned count)

View File

@ -319,14 +319,14 @@ EXPORT_SYMBOL(wake_bit_function);
*/ */
int __sched int __sched
__wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
int (*action)(void *), unsigned mode) wait_bit_action_f *action, unsigned mode)
{ {
int ret = 0; int ret = 0;
do { do {
prepare_to_wait(wq, &q->wait, mode); prepare_to_wait(wq, &q->wait, mode);
if (test_bit(q->key.bit_nr, q->key.flags)) if (test_bit(q->key.bit_nr, q->key.flags))
ret = (*action)(q->key.flags); ret = (*action)(&q->key);
} while (test_bit(q->key.bit_nr, q->key.flags) && !ret); } while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
finish_wait(wq, &q->wait); finish_wait(wq, &q->wait);
return ret; return ret;
@ -334,7 +334,7 @@ __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
EXPORT_SYMBOL(__wait_on_bit); EXPORT_SYMBOL(__wait_on_bit);
int __sched out_of_line_wait_on_bit(void *word, int bit, int __sched out_of_line_wait_on_bit(void *word, int bit,
int (*action)(void *), unsigned mode) wait_bit_action_f *action, unsigned mode)
{ {
wait_queue_head_t *wq = bit_waitqueue(word, bit); wait_queue_head_t *wq = bit_waitqueue(word, bit);
DEFINE_WAIT_BIT(wait, word, bit); DEFINE_WAIT_BIT(wait, word, bit);
@ -345,7 +345,7 @@ EXPORT_SYMBOL(out_of_line_wait_on_bit);
int __sched int __sched
__wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
int (*action)(void *), unsigned mode) wait_bit_action_f *action, unsigned mode)
{ {
do { do {
int ret; int ret;
@ -353,7 +353,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
prepare_to_wait_exclusive(wq, &q->wait, mode); prepare_to_wait_exclusive(wq, &q->wait, mode);
if (!test_bit(q->key.bit_nr, q->key.flags)) if (!test_bit(q->key.bit_nr, q->key.flags))
continue; continue;
ret = action(q->key.flags); ret = action(&q->key);
if (!ret) if (!ret)
continue; continue;
abort_exclusive_wait(wq, &q->wait, mode, &q->key); abort_exclusive_wait(wq, &q->wait, mode, &q->key);
@ -365,7 +365,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
EXPORT_SYMBOL(__wait_on_bit_lock); EXPORT_SYMBOL(__wait_on_bit_lock);
int __sched out_of_line_wait_on_bit_lock(void *word, int bit, int __sched out_of_line_wait_on_bit_lock(void *word, int bit,
int (*action)(void *), unsigned mode) wait_bit_action_f *action, unsigned mode)
{ {
wait_queue_head_t *wq = bit_waitqueue(word, bit); wait_queue_head_t *wq = bit_waitqueue(word, bit);
DEFINE_WAIT_BIT(wait, word, bit); DEFINE_WAIT_BIT(wait, word, bit);
@ -502,3 +502,21 @@ void wake_up_atomic_t(atomic_t *p)
__wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR); __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR);
} }
EXPORT_SYMBOL(wake_up_atomic_t); EXPORT_SYMBOL(wake_up_atomic_t);
__sched int bit_wait(struct wait_bit_key *word)
{
if (signal_pending_state(current->state, current))
return 1;
schedule();
return 0;
}
EXPORT_SYMBOL(bit_wait);
__sched int bit_wait_io(struct wait_bit_key *word)
{
if (signal_pending_state(current->state, current))
return 1;
io_schedule();
return 0;
}
EXPORT_SYMBOL(bit_wait_io);

View File

@ -3,6 +3,7 @@
* *
* (C) Jens Axboe <jens.axboe@oracle.com> 2008 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
*/ */
#include <linux/irq_work.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/rculist.h> #include <linux/rculist.h>
#include <linux/kernel.h> #include <linux/kernel.h>
@ -251,6 +252,14 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
csd->func(csd->info); csd->func(csd->info);
csd_unlock(csd); csd_unlock(csd);
} }
/*
* Handle irq works queued remotely by irq_work_queue_on().
* Smp functions above are typically synchronous so they
* better run first since some other CPUs may be busy waiting
* for them.
*/
irq_work_run();
} }
/* /*

View File

@ -225,13 +225,15 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
}; };
/* /*
* Kick the current CPU if it's full dynticks in order to force it to * Kick the CPU if it's full dynticks in order to force it to
* re-evaluate its dependency on the tick and restart it if necessary. * re-evaluate its dependency on the tick and restart it if necessary.
*/ */
void tick_nohz_full_kick(void) void tick_nohz_full_kick_cpu(int cpu)
{ {
if (tick_nohz_full_cpu(smp_processor_id())) if (!tick_nohz_full_cpu(cpu))
irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); return;
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
} }
static void nohz_full_kick_ipi(void *info) static void nohz_full_kick_ipi(void *info)

View File

@ -241,18 +241,6 @@ void delete_from_page_cache(struct page *page)
} }
EXPORT_SYMBOL(delete_from_page_cache); EXPORT_SYMBOL(delete_from_page_cache);
static int sleep_on_page(void *word)
{
io_schedule();
return 0;
}
static int sleep_on_page_killable(void *word)
{
sleep_on_page(word);
return fatal_signal_pending(current) ? -EINTR : 0;
}
static int filemap_check_errors(struct address_space *mapping) static int filemap_check_errors(struct address_space *mapping)
{ {
int ret = 0; int ret = 0;
@ -692,7 +680,7 @@ void wait_on_page_bit(struct page *page, int bit_nr)
DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
if (test_bit(bit_nr, &page->flags)) if (test_bit(bit_nr, &page->flags))
__wait_on_bit(page_waitqueue(page), &wait, sleep_on_page, __wait_on_bit(page_waitqueue(page), &wait, bit_wait_io,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
} }
EXPORT_SYMBOL(wait_on_page_bit); EXPORT_SYMBOL(wait_on_page_bit);
@ -705,7 +693,7 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
return 0; return 0;
return __wait_on_bit(page_waitqueue(page), &wait, return __wait_on_bit(page_waitqueue(page), &wait,
sleep_on_page_killable, TASK_KILLABLE); bit_wait_io, TASK_KILLABLE);
} }
/** /**
@ -806,7 +794,7 @@ void __lock_page(struct page *page)
{ {
DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
__wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page, __wait_on_bit_lock(page_waitqueue(page), &wait, bit_wait_io,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
} }
EXPORT_SYMBOL(__lock_page); EXPORT_SYMBOL(__lock_page);
@ -816,7 +804,7 @@ int __lock_page_killable(struct page *page)
DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
return __wait_on_bit_lock(page_waitqueue(page), &wait, return __wait_on_bit_lock(page_waitqueue(page), &wait,
sleep_on_page_killable, TASK_KILLABLE); bit_wait_io, TASK_KILLABLE);
} }
EXPORT_SYMBOL_GPL(__lock_page_killable); EXPORT_SYMBOL_GPL(__lock_page_killable);

View File

@ -1978,18 +1978,12 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage)
#endif /* CONFIG_MIGRATION */ #endif /* CONFIG_MIGRATION */
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
static int just_wait(void *word)
{
schedule();
return 0;
}
static void wait_while_offlining(void) static void wait_while_offlining(void)
{ {
while (ksm_run & KSM_RUN_OFFLINE) { while (ksm_run & KSM_RUN_OFFLINE) {
mutex_unlock(&ksm_thread_mutex); mutex_unlock(&ksm_thread_mutex);
wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE), wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE),
just_wait, TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
mutex_lock(&ksm_thread_mutex); mutex_lock(&ksm_thread_mutex);
} }
} }

View File

@ -2186,12 +2186,6 @@ static void hci_inq_req(struct hci_request *req, unsigned long opt)
hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
} }
static int wait_inquiry(void *word)
{
schedule();
return signal_pending(current);
}
int hci_inquiry(void __user *arg) int hci_inquiry(void __user *arg)
{ {
__u8 __user *ptr = arg; __u8 __user *ptr = arg;
@ -2242,7 +2236,7 @@ int hci_inquiry(void __user *arg)
/* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is /* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is
* cleared). If it is interrupted by a signal, return -EINTR. * cleared). If it is interrupted by a signal, return -EINTR.
*/ */
if (wait_on_bit(&hdev->flags, HCI_INQUIRY, wait_inquiry, if (wait_on_bit(&hdev->flags, HCI_INQUIRY,
TASK_INTERRUPTIBLE)) TASK_INTERRUPTIBLE))
return -EINTR; return -EINTR;
} }

View File

@ -250,7 +250,7 @@ void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
} }
EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
static int rpc_wait_bit_killable(void *word) static int rpc_wait_bit_killable(struct wait_bit_key *key)
{ {
if (fatal_signal_pending(current)) if (fatal_signal_pending(current))
return -ERESTARTSYS; return -ERESTARTSYS;
@ -309,7 +309,7 @@ static int rpc_complete_task(struct rpc_task *task)
* to enforce taking of the wq->lock and hence avoid races with * to enforce taking of the wq->lock and hence avoid races with
* rpc_complete_task(). * rpc_complete_task().
*/ */
int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action)
{ {
if (action == NULL) if (action == NULL)
action = rpc_wait_bit_killable; action = rpc_wait_bit_killable;

View File

@ -91,15 +91,6 @@ static void key_gc_timer_func(unsigned long data)
key_schedule_gc_links(); key_schedule_gc_links();
} }
/*
* wait_on_bit() sleep function for uninterruptible waiting
*/
static int key_gc_wait_bit(void *flags)
{
schedule();
return 0;
}
/* /*
* Reap keys of dead type. * Reap keys of dead type.
* *
@ -123,7 +114,7 @@ void key_gc_keytype(struct key_type *ktype)
schedule_work(&key_gc_work); schedule_work(&key_gc_work);
kdebug("sleep"); kdebug("sleep");
wait_on_bit(&key_gc_flags, KEY_GC_REAPING_KEYTYPE, key_gc_wait_bit, wait_on_bit(&key_gc_flags, KEY_GC_REAPING_KEYTYPE,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
key_gc_dead_keytype = NULL; key_gc_dead_keytype = NULL;

View File

@ -21,24 +21,6 @@
#define key_negative_timeout 60 /* default timeout on a negative key's existence */ #define key_negative_timeout 60 /* default timeout on a negative key's existence */
/*
* wait_on_bit() sleep function for uninterruptible waiting
*/
static int key_wait_bit(void *flags)
{
schedule();
return 0;
}
/*
* wait_on_bit() sleep function for interruptible waiting
*/
static int key_wait_bit_intr(void *flags)
{
schedule();
return signal_pending(current) ? -ERESTARTSYS : 0;
}
/** /**
* complete_request_key - Complete the construction of a key. * complete_request_key - Complete the construction of a key.
* @cons: The key construction record. * @cons: The key construction record.
@ -592,10 +574,9 @@ int wait_for_key_construction(struct key *key, bool intr)
int ret; int ret;
ret = wait_on_bit(&key->flags, KEY_FLAG_USER_CONSTRUCT, ret = wait_on_bit(&key->flags, KEY_FLAG_USER_CONSTRUCT,
intr ? key_wait_bit_intr : key_wait_bit,
intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
if (ret < 0) if (ret)
return ret; return -ERESTARTSYS;
if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
smp_rmb(); smp_rmb();
return key->type_data.reject_error; return key->type_data.reject_error;