bcache: Delete some slower inline asm
Never saw a profile of bset_search_tree() where it wasn't bottlenecked on memory until I got my new Haswell machine, but when I tried it there it was suddenly burning 20% of the cpu in the inner loop on shrd... Turns out, the version of shrd that takes 64 bit operands has a 9 cycle latency. hah. Signed-off-by: Kent Overstreet <kmo@daterainc.com>
This commit is contained in:
parent
28935ab516
commit
098fb25498
|
@ -481,16 +481,8 @@ static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline)
|
|||
|
||||
static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
asm("shrd %[shift],%[high],%[low]"
|
||||
: [low] "+Rm" (low)
|
||||
: [high] "R" (high),
|
||||
[shift] "ci" (shift)
|
||||
: "cc");
|
||||
#else
|
||||
low >>= shift;
|
||||
low |= (high << 1) << (63U - shift);
|
||||
#endif
|
||||
return low;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue