From 62f0f61e6673e67151a7c8c0f9a09c7ea43fe2b5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Dec 2007 19:16:17 +0100 Subject: [PATCH 1/3] hrtimers: avoid overflow for large relative timeouts Relative hrtimers with a large timeout value might end up as negative timer values, when the current time is added in hrtimer_start(). This in turn is causing the clockevents_set_next() function to set an huge timeout and sleep for quite a long time when we have a clock source which is capable of long sleeps like HPET. With PIT this almost goes unnoticed as the maximum delta is ~27ms. The non-hrt/nohz code sorts this out in the next timer interrupt, so we never noticed that problem which has been there since the first day of hrtimers. This bug became more apparent in 2.6.24 which activates HPET on more hardware. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/hrtimer.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 22a25142e4cf..e65dd0b47cdc 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -850,6 +850,14 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) #ifdef CONFIG_TIME_LOW_RES tim = ktime_add(tim, base->resolution); #endif + /* + * Careful here: User space might have asked for a + * very long sleep, so the add above might result in a + * negative number, which enqueues the timer in front + * of the queue. + */ + if (tim.tv64 < 0) + tim.tv64 = KTIME_MAX; } timer->expires = tim; From 167b1de3ee4e50d65a2bd0a2667c9cd48faf54f3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Dec 2007 19:16:17 +0100 Subject: [PATCH 2/3] clockevents: warn once when program_event() is called with negative expiry The hrtimer problem with large relative timeouts resulting in a negative expiry time went unnoticed as there is no check in the clockevents_program_event() code. Put a check there with a WARN_ONCE to avoid such problems in the future. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 822beebe664a..5fb139fef9fa 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -78,6 +78,11 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, unsigned long long clc; int64_t delta; + if (unlikely(expires.tv64 < 0)) { + WARN_ON_ONCE(1); + return -ETIME; + } + delta = ktime_to_ns(ktime_sub(expires, now)); if (delta <= 0) From e17bcb43a26a7111f851b5ff6d1258ecd355de75 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Dec 2007 19:16:17 +0100 Subject: [PATCH 3/3] ACPI: move timer broadcast before busmaster disable The timer broadcast code might access HPET, which should not be accessed after the busmaster disable. In acpi_idle_enter_simple() this change also prevents, that we modify the busmaster state without going actually idle. This might leave the ACPI bm state in a stale state, when we leave the function early in the need_resched() check. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Acked-by: Venkatesh Pallipadi --- drivers/acpi/processor_idle.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index b1fbee3f7fe1..2fe34cc73c13 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -530,6 +530,11 @@ static void acpi_processor_idle(void) break; case ACPI_STATE_C3: + /* + * Must be done before busmaster disable as we might + * need to access HPET ! + */ + acpi_state_timer_broadcast(pr, cx, 1); /* * disable bus master * bm_check implies we need ARB_DIS @@ -557,7 +562,6 @@ static void acpi_processor_idle(void) /* Get start time (ticks) */ t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); /* Invoke C3 */ - acpi_state_timer_broadcast(pr, cx, 1); /* Tell the scheduler that we are going deep-idle: */ sched_clock_idle_sleep_event(); acpi_cstate_enter(cx); @@ -1401,9 +1405,6 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, if (acpi_idle_suspend) return(acpi_idle_enter_c1(dev, state)); - if (pr->flags.bm_check) - acpi_idle_update_bm_rld(pr, cx); - local_irq_disable(); current_thread_info()->status &= ~TS_POLLING; /* @@ -1418,13 +1419,21 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, return 0; } + /* + * Must be done before busmaster disable as we might need to + * access HPET ! + */ + acpi_state_timer_broadcast(pr, cx, 1); + + if (pr->flags.bm_check) + acpi_idle_update_bm_rld(pr, cx); + if (cx->type == ACPI_STATE_C3) ACPI_FLUSH_CPU_CACHE(); t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); /* Tell the scheduler that we are going deep-idle: */ sched_clock_idle_sleep_event(); - acpi_state_timer_broadcast(pr, cx, 1); acpi_idle_do_entry(cx); t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);