Merge branch 'sparc64-early-boot-timestamp'

Pavel Tatashin says:

====================
sparc64: Early boot timestamp

Changelog:
	v2 - v3:
		- __aligned(64) -> __cacheline_aligned
		- Replaced in sched_clock() wmb() with barrier()
	v1 - v2:
		- Early boot timestamps are now available on all 64-bit
		  sparc processors
		- New hot-patched get_tick() function.

This patch set:
- enables early boot timestamps on SPARC,
- adds offset so we count time from zero, the same as it is done on other
  platforms
- improves the performance by inling, hot patching, and combining loads
  into the same cacheline. (and few other optimizations).

So, the final performance of sched_clock() is faster than now: the fewer
number of loads, and all of them are coming from the same cacheline. Loads
can run while we are reading tick value, and we do not do function call.

Current sched_clock():
sethi  %hi(0xb9b400), %g1
ldx  [ %g1 + 0x250 ], %g1
ldx  [ %g1 ], %g1
call  %g1
nop
sethi  %hi(0xb9b400), %g1
ldx  [ %g1 + 0x300 ], %g1
mulx  %o0, %g1, %g1
rett  %i7 + 8
srlx  %g1, 0xa, %o0

Final sched_clock():
sethi  %hi(0xb9b400), %g1
ldx  [ %g1 + 0x2c8 ], %g2
ldx  [ %g1 + 0x2c0 ], %g1
b  42f638 <sched_clock+0x44>
rd  %asr24, %i0
...
sllx  %i0, 1, %i0
srlx  %i0, 1, %i0
mulx  %i0, %g1, %i0
srlx  %i0, 0xa, %i0
rett  %i7 + 8
sub  %o0, %g2, %o0
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2017-06-12 15:44:11 -07:00
commit 9300d9c4ad
6 changed files with 181 additions and 58 deletions

View File

@ -1,5 +1,5 @@
/*
* Just a place holder.
* Just a place holder.
*/
#ifndef _SPARC_SETUP_H
#define _SPARC_SETUP_H

View File

@ -9,7 +9,12 @@
#include <linux/types.h>
#include <linux/init.h>
/* The most frequently accessed fields should be first,
* to fit into the same cacheline.
*/
struct sparc64_tick_ops {
unsigned long ticks_per_nsec_quotient;
unsigned long offset;
unsigned long long (*get_tick)(void);
int (*add_compare)(unsigned long);
unsigned long softint_mask;
@ -17,6 +22,8 @@ struct sparc64_tick_ops {
void (*init_tick)(void);
unsigned long (*add_tick)(unsigned long);
unsigned long (*get_frequency)(void);
unsigned long frequency;
char *name;
};
@ -27,4 +34,64 @@ unsigned long sparc64_get_clock_tick(unsigned int cpu);
void setup_sparc64_timer(void);
void __init time_init(void);
#define TICK_PRIV_BIT BIT(63)
#define TICKCMP_IRQ_BIT BIT(63)
#define HBIRD_STICKCMP_ADDR 0x1fe0000f060UL
#define HBIRD_STICK_ADDR 0x1fe0000f070UL
#define GET_TICK_NINSTR 13
struct get_tick_patch {
unsigned int addr;
unsigned int tick[GET_TICK_NINSTR];
unsigned int stick[GET_TICK_NINSTR];
};
extern struct get_tick_patch __get_tick_patch;
extern struct get_tick_patch __get_tick_patch_end;
static inline unsigned long get_tick(void)
{
unsigned long tick, tmp1, tmp2;
__asm__ __volatile__(
/* read hbtick 13 instructions */
"661:\n"
" mov 0x1fe, %1\n"
" sllx %1, 0x20, %1\n"
" sethi %%hi(0xf000), %2\n"
" or %2, 0x70, %2\n"
" or %1, %2, %1\n" /* %1 = HBIRD_STICK_ADDR */
" add %1, 8, %2\n"
" ldxa [%2]%3, %0\n"
" ldxa [%1]%3, %1\n"
" ldxa [%2]%3, %2\n"
" sub %2, %0, %0\n" /* don't modify %xcc */
" brnz,pn %0, 661b\n" /* restart to save one register */
" sllx %2, 32, %2\n"
" or %2, %1, %0\n"
/* Common/not patched code */
" sllx %0, 1, %0\n"
" srlx %0, 1, %0\n" /* Clear TICK_PRIV_BIT */
/* Beginning of patch section */
" .section .get_tick_patch, \"ax\"\n"
" .word 661b\n"
/* read tick 2 instructions and 11 skipped */
" ba 1f\n"
" rd %%tick, %0\n"
" .skip 4 * (%4 - 2)\n"
"1:\n"
/* read stick 2 instructions and 11 skipped */
" ba 1f\n"
" rd %%asr24, %0\n"
" .skip 4 * (%4 - 2)\n"
"1:\n"
/* End of patch section */
" .previous\n"
: "=&r" (tick), "=&r" (tmp1), "=&r" (tmp2)
: "i" (ASI_PHYS_BYPASS_EC_E), "i" (GET_TICK_NINSTR));
return tick;
}
#endif /* _SPARC64_TIMER_H */

View File

@ -52,6 +52,9 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs);
void do_signal32(struct pt_regs * regs);
asmlinkage int do_sys32_sigstack(u32 u_ssptr, u32 u_ossptr, unsigned long sp);
/* time_64.c */
void __init time_init_early(void);
/* compat_audit.c */
extern unsigned int sparc32_dir_class[];
extern unsigned int sparc32_chattr_class[];

View File

@ -95,7 +95,7 @@ static struct console prom_early_console = {
.index = -1,
};
/*
/*
* Process kernel command line switches that are specific to the
* SPARC or that require special low-level processing.
*/
@ -365,6 +365,7 @@ void __init start_early_boot(void)
}
current_thread_info()->cpu = cpu;
time_init_early();
prom_init_report();
start_kernel();
}
@ -639,7 +640,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_BLK_DEV_RAM
rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK;
rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0);
rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0);
rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0);
#endif
task_thread_info(&init_task)->kregs = &fake_swapper_regs;
@ -648,7 +649,7 @@ void __init setup_arch(char **cmdline_p)
if (!ic_set_manually) {
phandle chosen = prom_finddevice("/chosen");
u32 cl, sv, gw;
cl = prom_getintdefault (chosen, "client-ip", 0);
sv = prom_getintdefault (chosen, "server-ip", 0);
gw = prom_getintdefault (chosen, "gateway-ip", 0);

View File

@ -47,14 +47,13 @@
#include <asm/cpudata.h>
#include <linux/uaccess.h>
#include <asm/irq_regs.h>
#include <asm/cacheflush.h>
#include "entry.h"
#include "kernel.h"
DEFINE_SPINLOCK(rtc_lock);
#define TICK_PRIV_BIT (1UL << 63)
#define TICKCMP_IRQ_BIT (1UL << 63)
#ifdef CONFIG_SMP
unsigned long profile_pc(struct pt_regs *regs)
{
@ -164,13 +163,19 @@ static unsigned long tick_add_tick(unsigned long adj)
return new_tick;
}
static struct sparc64_tick_ops tick_operations __read_mostly = {
static unsigned long tick_get_frequency(void)
{
return local_cpu_data().clock_tick;
}
static struct sparc64_tick_ops tick_operations __cacheline_aligned = {
.name = "tick",
.init_tick = tick_init_tick,
.disable_irq = tick_disable_irq,
.get_tick = tick_get_tick,
.add_tick = tick_add_tick,
.add_compare = tick_add_compare,
.get_frequency = tick_get_frequency,
.softint_mask = 1UL << 0,
};
@ -250,6 +255,11 @@ static int stick_add_compare(unsigned long adj)
return ((long)(new_tick - (orig_tick+adj))) > 0L;
}
static unsigned long stick_get_frequency(void)
{
return prom_getint(prom_root_node, "stick-frequency");
}
static struct sparc64_tick_ops stick_operations __read_mostly = {
.name = "stick",
.init_tick = stick_init_tick,
@ -257,6 +267,7 @@ static struct sparc64_tick_ops stick_operations __read_mostly = {
.get_tick = stick_get_tick,
.add_tick = stick_add_tick,
.add_compare = stick_add_compare,
.get_frequency = stick_get_frequency,
.softint_mask = 1UL << 16,
};
@ -277,9 +288,6 @@ static struct sparc64_tick_ops stick_operations __read_mostly = {
* 2) write high
* 3) write low
*/
#define HBIRD_STICKCMP_ADDR 0x1fe0000f060UL
#define HBIRD_STICK_ADDR 0x1fe0000f070UL
static unsigned long __hbird_read_stick(void)
{
unsigned long ret, tmp1, tmp2, tmp3;
@ -381,6 +389,13 @@ static int hbtick_add_compare(unsigned long adj)
return ((long)(val2 - val)) > 0L;
}
static unsigned long hbtick_get_frequency(void)
{
struct device_node *dp = of_find_node_by_path("/");
return of_getintprop_default(dp, "stick-frequency", 0);
}
static struct sparc64_tick_ops hbtick_operations __read_mostly = {
.name = "hbtick",
.init_tick = hbtick_init_tick,
@ -388,11 +403,10 @@ static struct sparc64_tick_ops hbtick_operations __read_mostly = {
.get_tick = hbtick_get_tick,
.add_tick = hbtick_add_tick,
.add_compare = hbtick_add_compare,
.get_frequency = hbtick_get_frequency,
.softint_mask = 1UL << 0,
};
static unsigned long timer_ticks_per_nsec_quotient __read_mostly;
unsigned long cmos_regs;
EXPORT_SYMBOL(cmos_regs);
@ -582,34 +596,17 @@ static int __init clock_init(void)
*/
fs_initcall(clock_init);
/* This is gets the master TICK_INT timer going. */
static unsigned long sparc64_init_timers(void)
/* Return true if this is Hummingbird, aka Ultra-IIe */
static bool is_hummingbird(void)
{
struct device_node *dp;
unsigned long freq;
unsigned long ver, manuf, impl;
dp = of_find_node_by_path("/");
if (tlb_type == spitfire) {
unsigned long ver, manuf, impl;
__asm__ __volatile__ ("rdpr %%ver, %0"
: "=&r" (ver));
manuf = ((ver >> 48) & 0xffff);
impl = ((ver >> 32) & 0xffff);
__asm__ __volatile__ ("rdpr %%ver, %0"
: "=&r" (ver));
manuf = ((ver >> 48) & 0xffff);
impl = ((ver >> 32) & 0xffff);
if (manuf == 0x17 && impl == 0x13) {
/* Hummingbird, aka Ultra-IIe */
tick_ops = &hbtick_operations;
freq = of_getintprop_default(dp, "stick-frequency", 0);
} else {
tick_ops = &tick_operations;
freq = local_cpu_data().clock_tick;
}
} else {
tick_ops = &stick_operations;
freq = of_getintprop_default(dp, "stick-frequency", 0);
}
return freq;
return (manuf == 0x17 && impl == 0x13);
}
struct freq_table {
@ -671,12 +668,12 @@ core_initcall(register_sparc64_cpufreq_notifier);
static int sparc64_next_event(unsigned long delta,
struct clock_event_device *evt)
{
return tick_ops->add_compare(delta) ? -ETIME : 0;
return tick_operations.add_compare(delta) ? -ETIME : 0;
}
static int sparc64_timer_shutdown(struct clock_event_device *evt)
{
tick_ops->disable_irq();
tick_operations.disable_irq();
return 0;
}
@ -693,7 +690,7 @@ static DEFINE_PER_CPU(struct clock_event_device, sparc64_events);
void __irq_entry timer_interrupt(int irq, struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
unsigned long tick_mask = tick_ops->softint_mask;
unsigned long tick_mask = tick_operations.softint_mask;
int cpu = smp_processor_id();
struct clock_event_device *evt = &per_cpu(sparc64_events, cpu);
@ -728,7 +725,7 @@ void setup_sparc64_timer(void)
: "=r" (pstate)
: "i" (PSTATE_IE));
tick_ops->init_tick();
tick_operations.init_tick();
/* Restore PSTATE_IE. */
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
@ -755,12 +752,10 @@ static unsigned long tb_ticks_per_usec __read_mostly;
void __delay(unsigned long loops)
{
unsigned long bclock, now;
unsigned long bclock = get_tick();
bclock = tick_ops->get_tick();
do {
now = tick_ops->get_tick();
} while ((now-bclock) < loops);
while ((get_tick() - bclock) < loops)
;
}
EXPORT_SYMBOL(__delay);
@ -772,26 +767,71 @@ EXPORT_SYMBOL(udelay);
static u64 clocksource_tick_read(struct clocksource *cs)
{
return tick_ops->get_tick();
return get_tick();
}
static void __init get_tick_patch(void)
{
unsigned int *addr, *instr, i;
struct get_tick_patch *p;
if (tlb_type == spitfire && is_hummingbird())
return;
for (p = &__get_tick_patch; p < &__get_tick_patch_end; p++) {
instr = (tlb_type == spitfire) ? p->tick : p->stick;
addr = (unsigned int *)(unsigned long)p->addr;
for (i = 0; i < GET_TICK_NINSTR; i++) {
addr[i] = instr[i];
/* ensure that address is modified before flush */
wmb();
flushi(&addr[i]);
}
}
}
static void init_tick_ops(struct sparc64_tick_ops *ops)
{
unsigned long freq, quotient, tick;
freq = ops->get_frequency();
quotient = clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT);
tick = ops->get_tick();
ops->offset = (tick * quotient) >> SPARC64_NSEC_PER_CYC_SHIFT;
ops->ticks_per_nsec_quotient = quotient;
ops->frequency = freq;
tick_operations = *ops;
get_tick_patch();
}
void __init time_init_early(void)
{
if (tlb_type == spitfire) {
if (is_hummingbird())
init_tick_ops(&hbtick_operations);
else
init_tick_ops(&tick_operations);
} else {
init_tick_ops(&stick_operations);
}
}
void __init time_init(void)
{
unsigned long freq = sparc64_init_timers();
unsigned long freq;
freq = tick_operations.frequency;
tb_ticks_per_usec = freq / USEC_PER_SEC;
timer_ticks_per_nsec_quotient =
clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT);
clocksource_tick.name = tick_ops->name;
clocksource_tick.name = tick_operations.name;
clocksource_tick.read = clocksource_tick_read;
clocksource_register_hz(&clocksource_tick, freq);
printk("clocksource: mult[%x] shift[%d]\n",
clocksource_tick.mult, clocksource_tick.shift);
sparc64_clockevent.name = tick_ops->name;
sparc64_clockevent.name = tick_operations.name;
clockevents_calc_mult_shift(&sparc64_clockevent, freq, 4);
sparc64_clockevent.max_delta_ns =
@ -809,14 +849,21 @@ void __init time_init(void)
unsigned long long sched_clock(void)
{
unsigned long ticks = tick_ops->get_tick();
unsigned long quotient = tick_operations.ticks_per_nsec_quotient;
unsigned long offset = tick_operations.offset;
return (ticks * timer_ticks_per_nsec_quotient)
>> SPARC64_NSEC_PER_CYC_SHIFT;
/* Use barrier so the compiler emits the loads first and overlaps load
* latency with reading tick, because reading %tick/%stick is a
* post-sync instruction that will flush and restart subsequent
* instructions after it commits.
*/
barrier();
return ((get_tick() * quotient) >> SPARC64_NSEC_PER_CYC_SHIFT) - offset;
}
int read_current_timer(unsigned long *timer_val)
{
*timer_val = tick_ops->get_tick();
*timer_val = get_tick();
return 0;
}

View File

@ -149,6 +149,11 @@ SECTIONS
*(.sun_m7_2insn_patch)
__sun_m7_2insn_patch_end = .;
}
.get_tick_patch : {
__get_tick_patch = .;
*(.get_tick_patch)
__get_tick_patch_end = .;
}
PERCPU_SECTION(SMP_CACHE_BYTES)
#ifdef CONFIG_JUMP_LABEL