sparc64: optimize loads in clock_sched()

In clock sched we now have three loads:
	- Function pointer
	- quotient for multiplication
	- offset

However, it is possible to improve performance substantially, by
guaranteeing that all three loads are from the same cacheline.

By moving these three values first in sparc64_tick_ops, and by having
tick_operations 64-byte aligned we guarantee this.

Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Reviewed-by: Shannon Nelson <shannon.nelson@oracle.com>
Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Pavel Tatashin 2017-06-12 16:41:44 -04:00 committed by David S. Miller
parent b5dd4d807f
commit 178bf2b9a2
2 changed files with 12 additions and 10 deletions

View File

@ -9,7 +9,12 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/init.h> #include <linux/init.h>
/* The most frequently accessed fields should be first,
* to fit into the same cacheline.
*/
struct sparc64_tick_ops { struct sparc64_tick_ops {
unsigned long ticks_per_nsec_quotient;
unsigned long offset;
unsigned long long (*get_tick)(void); unsigned long long (*get_tick)(void);
int (*add_compare)(unsigned long); int (*add_compare)(unsigned long);
unsigned long softint_mask; unsigned long softint_mask;

View File

@ -164,7 +164,7 @@ static unsigned long tick_add_tick(unsigned long adj)
return new_tick; return new_tick;
} }
static struct sparc64_tick_ops tick_operations __read_mostly = { static struct sparc64_tick_ops tick_operations __cacheline_aligned = {
.name = "tick", .name = "tick",
.init_tick = tick_init_tick, .init_tick = tick_init_tick,
.disable_irq = tick_disable_irq, .disable_irq = tick_disable_irq,
@ -391,9 +391,6 @@ static struct sparc64_tick_ops hbtick_operations __read_mostly = {
.softint_mask = 1UL << 0, .softint_mask = 1UL << 0,
}; };
static unsigned long timer_ticks_per_nsec_quotient __read_mostly;
static unsigned long timer_offset __read_mostly;
unsigned long cmos_regs; unsigned long cmos_regs;
EXPORT_SYMBOL(cmos_regs); EXPORT_SYMBOL(cmos_regs);
@ -784,11 +781,11 @@ void __init time_init(void)
tb_ticks_per_usec = freq / USEC_PER_SEC; tb_ticks_per_usec = freq / USEC_PER_SEC;
timer_ticks_per_nsec_quotient = tick_operations.ticks_per_nsec_quotient =
clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT); clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT);
timer_offset = (tick_operations.get_tick() tick_operations.offset = (tick_operations.get_tick()
* timer_ticks_per_nsec_quotient) * tick_operations.ticks_per_nsec_quotient)
>> SPARC64_NSEC_PER_CYC_SHIFT; >> SPARC64_NSEC_PER_CYC_SHIFT;
clocksource_tick.name = tick_operations.name; clocksource_tick.name = tick_operations.name;
@ -816,11 +813,11 @@ void __init time_init(void)
unsigned long long sched_clock(void) unsigned long long sched_clock(void)
{ {
unsigned long quotient = tick_operations.ticks_per_nsec_quotient;
unsigned long offset = tick_operations.offset;
unsigned long ticks = tick_operations.get_tick(); unsigned long ticks = tick_operations.get_tick();
return ((ticks * timer_ticks_per_nsec_quotient) return ((ticks * quotient) >> SPARC64_NSEC_PER_CYC_SHIFT) - offset;
>> SPARC64_NSEC_PER_CYC_SHIFT)
- timer_offset;
} }
int read_current_timer(unsigned long *timer_val) int read_current_timer(unsigned long *timer_val)