arch/tile: fix __ndelay etc to work better

The current implementations of __ndelay and __udelay call a hypervisor
service to delay, but the hypervisor service isn't actually implemented
very well, and the consensus is that Linux should handle figuring this
out natively and not use a hypervisor service.

By converting nanoseconds to cycles, and then spinning until the
cycle counter reaches the desired cycle, we get several benefits:
first, we are sensitive to the actual clock speed; second, we use
less power by issuing a slow SPR read once every six cycles while
we delay; and third, we properly handle the case of an interrupt by
exiting at the target time rather than after some number of cycles.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
This commit is contained in:
Chris Metcalf 2011-02-28 13:21:52 -05:00
parent 04f7a3f12e
commit 1337173148
5 changed files with 34 additions and 11 deletions

View File

@ -38,6 +38,9 @@ static inline cycles_t get_cycles(void)
cycles_t get_clock_rate(void);
/* Convert nanoseconds to core clock cycles. */
cycles_t ns2cycles(unsigned long nsecs);
/* Called at cpu initialization to set some low-level constants. */
void setup_clock(void);

View File

@ -963,6 +963,11 @@ HV_ASIDRange hv_inquire_asid(int idx);
/** Waits for at least the specified number of nanoseconds then returns.
*
* NOTE: this deprecated function currently assumes a 750 MHz clock,
* and is thus not generally suitable for use. New code should call
* hv_sysconf(HV_SYSCONF_CPU_SPEED), compute a cycle count to wait for,
* and delay by looping while checking the cycle counter SPR.
*
* @param nanosecs The number of nanoseconds to sleep.
*/

View File

@ -38,12 +38,6 @@ STD_ENTRY(kernel_execve)
jrp lr
STD_ENDPROC(kernel_execve)
/* Delay a fixed number of cycles. */
STD_ENTRY(__delay)
{ addi r0, r0, -1; bnzt r0, . }
jrp lr
STD_ENDPROC(__delay)
/*
* We don't run this function directly, but instead copy it to a page
* we map into every user process. See vdso_setup().

View File

@ -224,3 +224,13 @@ int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
/*
* Use the tile timer to convert nsecs to core clock cycles, relying
* on it having the same frequency as SPR_CYCLE.
*/
cycles_t ns2cycles(unsigned long nsecs)
{
struct clock_event_device *dev = &__get_cpu_var(tile_timer);
return ((u64)nsecs * dev->mult) >> dev->shift;
}

View File

@ -15,20 +15,31 @@
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/thread_info.h>
#include <asm/fixmap.h>
#include <hv/hypervisor.h>
#include <asm/timex.h>
void __udelay(unsigned long usecs)
{
hv_nanosleep(usecs * 1000);
if (usecs > ULONG_MAX / 1000) {
WARN_ON_ONCE(usecs > ULONG_MAX / 1000);
usecs = ULONG_MAX / 1000;
}
__ndelay(usecs * 1000);
}
EXPORT_SYMBOL(__udelay);
void __ndelay(unsigned long nsecs)
{
hv_nanosleep(nsecs);
cycles_t target = get_cycles();
target += ns2cycles(nsecs);
while (get_cycles() < target)
cpu_relax();
}
EXPORT_SYMBOL(__ndelay);
/* FIXME: should be declared in a header somewhere. */
void __delay(unsigned long cycles)
{
cycles_t target = get_cycles() + cycles;
while (get_cycles() < target)
cpu_relax();
}
EXPORT_SYMBOL(__delay);