arm64: csum: Optimise IPv6 header checksum
Throwing our __uint128_t idioms at csum_ipv6_magic() makes it about 1.3x-2x faster across a range of microarchitecture/compiler combinations. Not much in absolute terms, but every little helps. Signed-off-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
This commit is contained in:
parent
27afb236fe
commit
e9c7ddbf8b
|
@ -5,7 +5,12 @@
|
|||
#ifndef __ASM_CHECKSUM_H
|
||||
#define __ASM_CHECKSUM_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/in6.h>
|
||||
|
||||
#define _HAVE_ARCH_IPV6_CSUM
|
||||
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
|
||||
const struct in6_addr *daddr,
|
||||
__u32 len, __u8 proto, __wsum sum);
|
||||
|
||||
static inline __sum16 csum_fold(__wsum csum)
|
||||
{
|
||||
|
|
|
@ -124,3 +124,30 @@ unsigned int do_csum(const unsigned char *buff, int len)
|
|||
|
||||
return sum >> 16;
|
||||
}
|
||||
|
||||
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
|
||||
const struct in6_addr *daddr,
|
||||
__u32 len, __u8 proto, __wsum csum)
|
||||
{
|
||||
__uint128_t src, dst;
|
||||
u64 sum = (__force u64)csum;
|
||||
|
||||
src = *(const __uint128_t *)saddr->s6_addr;
|
||||
dst = *(const __uint128_t *)daddr->s6_addr;
|
||||
|
||||
sum += (__force u32)htonl(len);
|
||||
#ifdef __LITTLE_ENDIAN
|
||||
sum += (u32)proto << 24;
|
||||
#else
|
||||
sum += proto;
|
||||
#endif
|
||||
src += (src >> 64) | (src << 64);
|
||||
dst += (dst >> 64) | (dst << 64);
|
||||
|
||||
sum = accumulate(sum, src >> 64);
|
||||
sum = accumulate(sum, dst >> 64);
|
||||
|
||||
sum += ((sum >> 32) | (sum << 32));
|
||||
return csum_fold((__force __wsum)(sum >> 32));
|
||||
}
|
||||
EXPORT_SYMBOL(csum_ipv6_magic);
|
||||
|
|
Loading…
Reference in New Issue