arch/tile: fix __ndelay etc to work better
The current implementations of __ndelay and __udelay call a hypervisor service to delay, but the hypervisor service isn't actually implemented very well, and the consensus is that Linux should handle figuring this out natively and not use a hypervisor service. By converting nanoseconds to cycles, and then spinning until the cycle counter reaches the desired cycle, we get several benefits: first, we are sensitive to the actual clock speed; second, we use less power by issuing a slow SPR read once every six cycles while we delay; and third, we properly handle the case of an interrupt by exiting at the target time rather than after some number of cycles. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
This commit is contained in:
parent
04f7a3f12e
commit
1337173148
|
@ -38,6 +38,9 @@ static inline cycles_t get_cycles(void)
|
|||
|
||||
cycles_t get_clock_rate(void);
|
||||
|
||||
/* Convert nanoseconds to core clock cycles. */
|
||||
cycles_t ns2cycles(unsigned long nsecs);
|
||||
|
||||
/* Called at cpu initialization to set some low-level constants. */
|
||||
void setup_clock(void);
|
||||
|
||||
|
|
|
@ -963,6 +963,11 @@ HV_ASIDRange hv_inquire_asid(int idx);
|
|||
|
||||
|
||||
/** Waits for at least the specified number of nanoseconds then returns.
|
||||
*
|
||||
* NOTE: this deprecated function currently assumes a 750 MHz clock,
|
||||
* and is thus not generally suitable for use. New code should call
|
||||
* hv_sysconf(HV_SYSCONF_CPU_SPEED), compute a cycle count to wait for,
|
||||
* and delay by looping while checking the cycle counter SPR.
|
||||
*
|
||||
* @param nanosecs The number of nanoseconds to sleep.
|
||||
*/
|
||||
|
|
|
@ -38,12 +38,6 @@ STD_ENTRY(kernel_execve)
|
|||
jrp lr
|
||||
STD_ENDPROC(kernel_execve)
|
||||
|
||||
/* Delay a fixed number of cycles. */
|
||||
STD_ENTRY(__delay)
|
||||
{ addi r0, r0, -1; bnzt r0, . }
|
||||
jrp lr
|
||||
STD_ENDPROC(__delay)
|
||||
|
||||
/*
|
||||
* We don't run this function directly, but instead copy it to a page
|
||||
* we map into every user process. See vdso_setup().
|
||||
|
|
|
@ -224,3 +224,13 @@ int setup_profiling_timer(unsigned int multiplier)
|
|||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the tile timer to convert nsecs to core clock cycles, relying
|
||||
* on it having the same frequency as SPR_CYCLE.
|
||||
*/
|
||||
cycles_t ns2cycles(unsigned long nsecs)
|
||||
{
|
||||
struct clock_event_device *dev = &__get_cpu_var(tile_timer);
|
||||
return ((u64)nsecs * dev->mult) >> dev->shift;
|
||||
}
|
||||
|
|
|
@ -15,20 +15,31 @@
|
|||
#include <linux/module.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/thread_info.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <hv/hypervisor.h>
|
||||
#include <asm/timex.h>
|
||||
|
||||
void __udelay(unsigned long usecs)
|
||||
{
|
||||
hv_nanosleep(usecs * 1000);
|
||||
if (usecs > ULONG_MAX / 1000) {
|
||||
WARN_ON_ONCE(usecs > ULONG_MAX / 1000);
|
||||
usecs = ULONG_MAX / 1000;
|
||||
}
|
||||
__ndelay(usecs * 1000);
|
||||
}
|
||||
EXPORT_SYMBOL(__udelay);
|
||||
|
||||
void __ndelay(unsigned long nsecs)
|
||||
{
|
||||
hv_nanosleep(nsecs);
|
||||
cycles_t target = get_cycles();
|
||||
target += ns2cycles(nsecs);
|
||||
while (get_cycles() < target)
|
||||
cpu_relax();
|
||||
}
|
||||
EXPORT_SYMBOL(__ndelay);
|
||||
|
||||
/* FIXME: should be declared in a header somewhere. */
|
||||
void __delay(unsigned long cycles)
|
||||
{
|
||||
cycles_t target = get_cycles() + cycles;
|
||||
while (get_cycles() < target)
|
||||
cpu_relax();
|
||||
}
|
||||
EXPORT_SYMBOL(__delay);
|
||||
|
|
Loading…
Reference in New Issue