x86-64: support native xadd rwsem implementation
This one is much faster than the spinlock based fallback rwsem code, with certain artifical benchmarks having shown 300%+ improvement on threaded page faults etc. Again, note the 32767-thread limit here. So this really does need that whole "make rwsem_count_t be 64-bit and fix the BIAS values to match" extension on top of it, but that is conceptually a totally independent issue. NOT TESTED! The original patch that this all was based on were tested by KAMEZAWA Hiroyuki, but maybe I screwed up something when I created the cleaned-up series, so caveat emptor.. Also note that it _may_ be a good idea to mark some more registers clobbered on x86-64 in the inline asms instead of saving/restoring them. They are inline functions, but they are only used in places where there are not a lot of live registers _anyway_, so doing for example the clobbers of %r8-%r11 in the asm wouldn't make the fast-path code any worse, and would make the slow-path code smaller. (Not that the slow-path really matters to that degree. Saving a few unnecessary registers is the _least_ of our problems when we hit the slow path. The instruction/cycle counting really only matters in the fast path). Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> LKML-Reference: <alpine.LFD.2.00.1001121810410.17145@localhost.localdomain> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
This commit is contained in:
parent
5d0b7235d8
commit
bafaecd11d
|
@ -319,7 +319,7 @@ config X86_L1_CACHE_SHIFT
|
|||
|
||||
config X86_XADD
|
||||
def_bool y
|
||||
depends on X86_32 && !M386
|
||||
depends on X86_64 || !M386
|
||||
|
||||
config X86_PPRO_FENCE
|
||||
bool "PentiumPro memory ordering errata workaround"
|
||||
|
|
|
@ -39,4 +39,5 @@ else
|
|||
lib-y += thunk_64.o clear_page_64.o copy_page_64.o
|
||||
lib-y += memmove_64.o memset_64.o
|
||||
lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
|
||||
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
|
||||
endif
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* x86-64 rwsem wrappers
|
||||
*
|
||||
* This interfaces the inline asm code to the slow-path
|
||||
* C routines. We need to save the call-clobbered regs
|
||||
* that the asm does not mark as clobbered, and move the
|
||||
* argument from %rax to %rdi.
|
||||
*
|
||||
* NOTE! We don't need to save %rax, because the functions
|
||||
* will always return the semaphore pointer in %rax (which
|
||||
* is also the input argument to these helpers)
|
||||
*
|
||||
* The following can clobber %rdx because the asm clobbers it:
|
||||
* call_rwsem_down_write_failed
|
||||
* call_rwsem_wake
|
||||
* but %rdi, %rsi, %rcx, %r8-r11 always need saving.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/rwlock.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
#include <asm/frame.h>
|
||||
#include <asm/dwarf2.h>
|
||||
|
||||
#define save_common_regs \
|
||||
pushq %rdi; \
|
||||
pushq %rsi; \
|
||||
pushq %rcx; \
|
||||
pushq %r8; \
|
||||
pushq %r9; \
|
||||
pushq %r10; \
|
||||
pushq %r11
|
||||
|
||||
#define restore_common_regs \
|
||||
popq %r11; \
|
||||
popq %r10; \
|
||||
popq %r9; \
|
||||
popq %r8; \
|
||||
popq %rcx; \
|
||||
popq %rsi; \
|
||||
popq %rdi
|
||||
|
||||
/* Fix up special calling conventions */
|
||||
ENTRY(call_rwsem_down_read_failed)
|
||||
save_common_regs
|
||||
pushq %rdx
|
||||
movq %rax,%rdi
|
||||
call rwsem_down_read_failed
|
||||
popq %rdx
|
||||
restore_common_regs
|
||||
ret
|
||||
ENDPROC(call_rwsem_down_read_failed)
|
||||
|
||||
ENTRY(call_rwsem_down_write_failed)
|
||||
save_common_regs
|
||||
movq %rax,%rdi
|
||||
call rwsem_down_write_failed
|
||||
restore_common_regs
|
||||
ret
|
||||
ENDPROC(call_rwsem_down_write_failed)
|
||||
|
||||
ENTRY(call_rwsem_wake)
|
||||
decw %dx /* do nothing if still outstanding active readers */
|
||||
jnz 1f
|
||||
save_common_regs
|
||||
movq %rax,%rdi
|
||||
call rwsem_wake
|
||||
restore_common_regs
|
||||
1: ret
|
||||
ENDPROC(call_rwsem_wake)
|
||||
|
||||
/* Fix up special calling conventions */
|
||||
ENTRY(call_rwsem_downgrade_wake)
|
||||
save_common_regs
|
||||
pushq %rdx
|
||||
movq %rax,%rdi
|
||||
call rwsem_downgrade_wake
|
||||
popq %rdx
|
||||
restore_common_regs
|
||||
ret
|
||||
ENDPROC(call_rwsem_downgrade_wake)
|
Loading…
Reference in New Issue