2005-10-21 00:44:46 +08:00
|
|
|
#ifndef _ASM_POWERPC_CHECKSUM_H
|
|
|
|
#define _ASM_POWERPC_CHECKSUM_H
|
2005-12-17 05:43:46 +08:00
|
|
|
#ifdef __KERNEL__
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
|
2013-09-23 10:04:51 +08:00
|
|
|
#ifdef CONFIG_GENERIC_CSUM
|
|
|
|
#include <asm-generic/checksum.h>
|
|
|
|
#else
|
powerpc/64: optimises from64to32()
The current implementation of from64to32() gives a poor result:
0000000000000270 <.from64to32>:
270: 38 00 ff ff li r0,-1
274: 78 69 00 22 rldicl r9,r3,32,32
278: 78 00 00 20 clrldi r0,r0,32
27c: 7c 60 00 38 and r0,r3,r0
280: 7c 09 02 14 add r0,r9,r0
284: 78 09 00 22 rldicl r9,r0,32,32
288: 7c 00 4a 14 add r0,r0,r9
28c: 78 03 00 20 clrldi r3,r0,32
290: 4e 80 00 20 blr
This patch modifies from64to32() to operate in the same
spirit as csum_fold()
It swaps the two 32-bit halves of sum then it adds it with the
unswapped sum. If there is a carry from adding the two 32-bit halves,
it will carry from the lower half into the upper half, giving us the
correct sum in the upper half.
The resulting code is:
0000000000000260 <.from64to32>:
260: 78 60 00 02 rotldi r0,r3,32
264: 7c 60 1a 14 add r3,r0,r3
268: 78 63 00 22 rldicl r3,r3,32,32
26c: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-04-10 14:34:35 +08:00
|
|
|
#include <linux/bitops.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
2005-10-21 00:44:46 +08:00
|
|
|
* Computes the checksum of a memory block at src, length len,
|
|
|
|
* and adds in "sum" (32-bit), while copying the block to dst.
|
|
|
|
* If an access exception occurs on src or dst, it stores -EFAULT
|
|
|
|
* to *src_err or *dst_err respectively (if that pointer is not
|
|
|
|
* NULL), and, for an error on src, zeroes the rest of dst.
|
|
|
|
*
|
|
|
|
* Like csum_partial, this must be called with even lengths,
|
|
|
|
* except for the last fragment.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2006-11-15 13:21:58 +08:00
|
|
|
extern __wsum csum_partial_copy_generic(const void *src, void *dst,
|
|
|
|
int len, __wsum sum,
|
2005-04-17 06:20:36 +08:00
|
|
|
int *src_err, int *dst_err);
|
2010-08-03 04:09:52 +08:00
|
|
|
|
|
|
|
#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
|
|
|
|
extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
|
|
|
|
int len, __wsum sum, int *err_ptr);
|
2010-08-03 04:11:36 +08:00
|
|
|
#define HAVE_CSUM_COPY_USER
|
|
|
|
extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
|
|
|
|
int len, __wsum sum, int *err_ptr);
|
2005-10-21 00:44:46 +08:00
|
|
|
|
|
|
|
#define csum_partial_copy_nocheck(src, dst, len, sum) \
|
|
|
|
csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL)
|
|
|
|
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* turns a 32-bit partial checksum (e.g. from csum_partial) into a
|
|
|
|
* 1's complement 16-bit checksum.
|
|
|
|
*/
|
2006-11-15 13:21:58 +08:00
|
|
|
static inline __sum16 csum_fold(__wsum sum)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
unsigned int tmp;
|
|
|
|
|
|
|
|
/* swap the two 16-bit halves of sum */
|
|
|
|
__asm__("rlwinm %0,%1,16,0,31" : "=r" (tmp) : "r" (sum));
|
|
|
|
/* if there is a carry from adding the two 16-bit halves,
|
|
|
|
it will carry from the lower half into the upper half,
|
|
|
|
giving us the correct sum in the upper half. */
|
2006-11-15 13:21:58 +08:00
|
|
|
return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2016-11-03 13:10:55 +08:00
|
|
|
static inline u32 from64to32(u64 x)
|
|
|
|
{
|
powerpc/64: optimises from64to32()
The current implementation of from64to32() gives a poor result:
0000000000000270 <.from64to32>:
270: 38 00 ff ff li r0,-1
274: 78 69 00 22 rldicl r9,r3,32,32
278: 78 00 00 20 clrldi r0,r0,32
27c: 7c 60 00 38 and r0,r3,r0
280: 7c 09 02 14 add r0,r9,r0
284: 78 09 00 22 rldicl r9,r0,32,32
288: 7c 00 4a 14 add r0,r0,r9
28c: 78 03 00 20 clrldi r3,r0,32
290: 4e 80 00 20 blr
This patch modifies from64to32() to operate in the same
spirit as csum_fold()
It swaps the two 32-bit halves of sum then it adds it with the
unswapped sum. If there is a carry from adding the two 32-bit halves,
it will carry from the lower half into the upper half, giving us the
correct sum in the upper half.
The resulting code is:
0000000000000260 <.from64to32>:
260: 78 60 00 02 rotldi r0,r3,32
264: 7c 60 1a 14 add r3,r0,r3
268: 78 63 00 22 rldicl r3,r3,32,32
26c: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-04-10 14:34:35 +08:00
|
|
|
return (x + ror64(x, 32)) >> 32;
|
2016-11-03 13:10:55 +08:00
|
|
|
}
|
|
|
|
|
2016-10-27 22:30:06 +08:00
|
|
|
static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
|
|
|
|
__u8 proto, __wsum sum)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2006-11-15 13:21:58 +08:00
|
|
|
#ifdef __powerpc64__
|
2016-11-03 13:10:55 +08:00
|
|
|
u64 s = (__force u32)sum;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-11-15 13:21:58 +08:00
|
|
|
s += (__force u32)saddr;
|
|
|
|
s += (__force u32)daddr;
|
2016-11-03 13:15:42 +08:00
|
|
|
#ifdef __BIG_ENDIAN__
|
2006-11-15 13:21:58 +08:00
|
|
|
s += proto + len;
|
2016-11-03 13:15:42 +08:00
|
|
|
#else
|
|
|
|
s += (proto + len) << 8;
|
|
|
|
#endif
|
2016-11-03 13:10:55 +08:00
|
|
|
return (__force __wsum) from64to32(s);
|
2005-10-21 00:44:46 +08:00
|
|
|
#else
|
|
|
|
__asm__("\n\
|
|
|
|
addc %0,%0,%1 \n\
|
|
|
|
adde %0,%0,%2 \n\
|
|
|
|
adde %0,%0,%3 \n\
|
|
|
|
addze %0,%0 \n\
|
|
|
|
"
|
|
|
|
: "=r" (sum)
|
2006-11-15 13:21:58 +08:00
|
|
|
: "r" (daddr), "r"(saddr), "r"(proto + len), "0"(sum));
|
|
|
|
return sum;
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
2006-11-15 13:21:58 +08:00
|
|
|
}
|
2013-09-23 10:04:51 +08:00
|
|
|
|
2015-05-19 23:18:55 +08:00
|
|
|
/*
|
|
|
|
* computes the checksum of the TCP/UDP pseudo-header
|
|
|
|
* returns a 16-bit checksum, already complemented
|
|
|
|
*/
|
2016-10-27 22:30:06 +08:00
|
|
|
static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
|
|
|
|
__u8 proto, __wsum sum)
|
2015-05-19 23:18:55 +08:00
|
|
|
{
|
|
|
|
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
|
|
|
|
}
|
|
|
|
|
powerpc: add support for csum_add()
The C version of csum_add() as defined in include/net/checksum.h gives
the following assembly in ppc32:
0: 7c 04 1a 14 add r0,r4,r3
4: 7c 64 00 10 subfc r3,r4,r0
8: 7c 63 19 10 subfe r3,r3,r3
c: 7c 63 00 50 subf r3,r3,r0
and the following in ppc64:
0xc000000000001af8 <+0>: add r3,r3,r4
0xc000000000001afc <+4>: cmplw cr7,r3,r4
0xc000000000001b00 <+8>: mfcr r4
0xc000000000001b04 <+12>: rlwinm r4,r4,29,31,31
0xc000000000001b08 <+16>: add r3,r4,r3
0xc000000000001b0c <+20>: clrldi r3,r3,32
0xc000000000001b10 <+24>: blr
include/net/checksum.h also offers the possibility to define an arch
specific function. This patch provides a specific csum_add() inline
function.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
2015-05-19 23:18:57 +08:00
|
|
|
#define HAVE_ARCH_CSUM_ADD
|
|
|
|
static inline __wsum csum_add(__wsum csum, __wsum addend)
|
|
|
|
{
|
|
|
|
#ifdef __powerpc64__
|
|
|
|
u64 res = (__force u64)csum;
|
2015-09-22 22:34:34 +08:00
|
|
|
#endif
|
|
|
|
if (__builtin_constant_p(csum) && csum == 0)
|
|
|
|
return addend;
|
|
|
|
if (__builtin_constant_p(addend) && addend == 0)
|
|
|
|
return csum;
|
powerpc: add support for csum_add()
The C version of csum_add() as defined in include/net/checksum.h gives
the following assembly in ppc32:
0: 7c 04 1a 14 add r0,r4,r3
4: 7c 64 00 10 subfc r3,r4,r0
8: 7c 63 19 10 subfe r3,r3,r3
c: 7c 63 00 50 subf r3,r3,r0
and the following in ppc64:
0xc000000000001af8 <+0>: add r3,r3,r4
0xc000000000001afc <+4>: cmplw cr7,r3,r4
0xc000000000001b00 <+8>: mfcr r4
0xc000000000001b04 <+12>: rlwinm r4,r4,29,31,31
0xc000000000001b08 <+16>: add r3,r4,r3
0xc000000000001b0c <+20>: clrldi r3,r3,32
0xc000000000001b10 <+24>: blr
include/net/checksum.h also offers the possibility to define an arch
specific function. This patch provides a specific csum_add() inline
function.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
2015-05-19 23:18:57 +08:00
|
|
|
|
2015-09-22 22:34:34 +08:00
|
|
|
#ifdef __powerpc64__
|
powerpc: add support for csum_add()
The C version of csum_add() as defined in include/net/checksum.h gives
the following assembly in ppc32:
0: 7c 04 1a 14 add r0,r4,r3
4: 7c 64 00 10 subfc r3,r4,r0
8: 7c 63 19 10 subfe r3,r3,r3
c: 7c 63 00 50 subf r3,r3,r0
and the following in ppc64:
0xc000000000001af8 <+0>: add r3,r3,r4
0xc000000000001afc <+4>: cmplw cr7,r3,r4
0xc000000000001b00 <+8>: mfcr r4
0xc000000000001b04 <+12>: rlwinm r4,r4,29,31,31
0xc000000000001b08 <+16>: add r3,r4,r3
0xc000000000001b0c <+20>: clrldi r3,r3,32
0xc000000000001b10 <+24>: blr
include/net/checksum.h also offers the possibility to define an arch
specific function. This patch provides a specific csum_add() inline
function.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
2015-05-19 23:18:57 +08:00
|
|
|
res += (__force u64)addend;
|
Revert "powerpc/64: Fix checksum folding in csum_add()"
This reverts commit 6ad966d7303b70165228dba1ee8da1a05c10eefe.
That commit was pointless, because csum_add() sums two 32 bits
values, so the sum is 0x1fffffffe at the maximum.
And then when adding upper part (1) and lower part (0xfffffffe),
the result is 0xffffffff which doesn't carry.
Any lower value will not carry either.
And behind the fact that this commit is useless, it also kills the
whole purpose of having an arch specific inline csum_add()
because the resulting code gets even worse than what is obtained
with the generic implementation of csum_add()
0000000000000240 <.csum_add>:
240: 38 00 ff ff li r0,-1
244: 7c 84 1a 14 add r4,r4,r3
248: 78 00 00 20 clrldi r0,r0,32
24c: 78 89 00 22 rldicl r9,r4,32,32
250: 7c 80 00 38 and r0,r4,r0
254: 7c 09 02 14 add r0,r9,r0
258: 78 09 00 22 rldicl r9,r0,32,32
25c: 7c 00 4a 14 add r0,r0,r9
260: 78 03 00 20 clrldi r3,r0,32
264: 4e 80 00 20 blr
In comparison, the generic implementation of csum_add() gives:
0000000000000290 <.csum_add>:
290: 7c 63 22 14 add r3,r3,r4
294: 7f 83 20 40 cmplw cr7,r3,r4
298: 7c 10 10 26 mfocrf r0,1
29c: 54 00 ef fe rlwinm r0,r0,29,31,31
2a0: 7c 60 1a 14 add r3,r0,r3
2a4: 78 63 00 20 clrldi r3,r3,32
2a8: 4e 80 00 20 blr
And the reverted implementation for PPC64 gives:
0000000000000240 <.csum_add>:
240: 7c 84 1a 14 add r4,r4,r3
244: 78 80 00 22 rldicl r0,r4,32,32
248: 7c 80 22 14 add r4,r0,r4
24c: 78 83 00 20 clrldi r3,r4,32
250: 4e 80 00 20 blr
Fixes: 6ad966d7303b7 ("powerpc/64: Fix checksum folding in csum_add()")
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Acked-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-04-10 14:34:37 +08:00
|
|
|
return (__force __wsum)((u32)res + (res >> 32));
|
powerpc: add support for csum_add()
The C version of csum_add() as defined in include/net/checksum.h gives
the following assembly in ppc32:
0: 7c 04 1a 14 add r0,r4,r3
4: 7c 64 00 10 subfc r3,r4,r0
8: 7c 63 19 10 subfe r3,r3,r3
c: 7c 63 00 50 subf r3,r3,r0
and the following in ppc64:
0xc000000000001af8 <+0>: add r3,r3,r4
0xc000000000001afc <+4>: cmplw cr7,r3,r4
0xc000000000001b00 <+8>: mfcr r4
0xc000000000001b04 <+12>: rlwinm r4,r4,29,31,31
0xc000000000001b08 <+16>: add r3,r4,r3
0xc000000000001b0c <+20>: clrldi r3,r3,32
0xc000000000001b10 <+24>: blr
include/net/checksum.h also offers the possibility to define an arch
specific function. This patch provides a specific csum_add() inline
function.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
2015-05-19 23:18:57 +08:00
|
|
|
#else
|
|
|
|
asm("addc %0,%0,%1;"
|
|
|
|
"addze %0,%0;"
|
2015-09-22 22:34:21 +08:00
|
|
|
: "+r" (csum) : "r" (addend) : "xer");
|
powerpc: add support for csum_add()
The C version of csum_add() as defined in include/net/checksum.h gives
the following assembly in ppc32:
0: 7c 04 1a 14 add r0,r4,r3
4: 7c 64 00 10 subfc r3,r4,r0
8: 7c 63 19 10 subfe r3,r3,r3
c: 7c 63 00 50 subf r3,r3,r0
and the following in ppc64:
0xc000000000001af8 <+0>: add r3,r3,r4
0xc000000000001afc <+4>: cmplw cr7,r3,r4
0xc000000000001b00 <+8>: mfcr r4
0xc000000000001b04 <+12>: rlwinm r4,r4,29,31,31
0xc000000000001b08 <+16>: add r3,r4,r3
0xc000000000001b0c <+20>: clrldi r3,r3,32
0xc000000000001b10 <+24>: blr
include/net/checksum.h also offers the possibility to define an arch
specific function. This patch provides a specific csum_add() inline
function.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
2015-05-19 23:18:57 +08:00
|
|
|
return csum;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2015-09-22 22:34:25 +08:00
|
|
|
/*
|
|
|
|
* This is a version of ip_compute_csum() optimized for IP headers,
|
|
|
|
* which always checksum on 4 octet boundaries. ihl is the number
|
|
|
|
* of 32-bit words and is always >= 5.
|
|
|
|
*/
|
|
|
|
static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl)
|
|
|
|
{
|
|
|
|
const u32 *ptr = (const u32 *)iph + 1;
|
|
|
|
#ifdef __powerpc64__
|
|
|
|
unsigned int i;
|
|
|
|
u64 s = *(const u32 *)iph;
|
|
|
|
|
|
|
|
for (i = 0; i < ihl - 1; i++, ptr++)
|
|
|
|
s += *ptr;
|
2016-11-03 13:10:55 +08:00
|
|
|
return (__force __wsum)from64to32(s);
|
2015-09-22 22:34:25 +08:00
|
|
|
#else
|
|
|
|
__wsum sum, tmp;
|
|
|
|
|
|
|
|
asm("mtctr %3;"
|
|
|
|
"addc %0,%4,%5;"
|
|
|
|
"1: lwzu %1, 4(%2);"
|
|
|
|
"adde %0,%0,%1;"
|
|
|
|
"bdnz 1b;"
|
|
|
|
"addze %0,%0;"
|
|
|
|
: "=r" (sum), "=r" (tmp), "+b" (ptr)
|
|
|
|
: "r" (ihl - 2), "r" (*(const u32 *)iph), "r" (*ptr)
|
|
|
|
: "ctr", "xer", "memory");
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
|
|
|
|
{
|
|
|
|
return csum_fold(ip_fast_csum_nofold(iph, ihl));
|
|
|
|
}
|
|
|
|
|
2016-03-08 01:44:37 +08:00
|
|
|
/*
|
|
|
|
* computes the checksum of a memory block at buff, length len,
|
|
|
|
* and adds in "sum" (32-bit)
|
|
|
|
*
|
|
|
|
* returns a 32-bit number suitable for feeding into itself
|
|
|
|
* or csum_tcpudp_magic
|
|
|
|
*
|
|
|
|
* this function must be called with even lengths, except
|
|
|
|
* for the last fragment, which may be odd
|
|
|
|
*
|
|
|
|
* it's best to have buff aligned on a 32-bit boundary
|
|
|
|
*/
|
|
|
|
__wsum __csum_partial(const void *buff, int len, __wsum sum);
|
|
|
|
|
|
|
|
static inline __wsum csum_partial(const void *buff, int len, __wsum sum)
|
|
|
|
{
|
|
|
|
if (__builtin_constant_p(len) && len <= 16 && (len & 1) == 0) {
|
|
|
|
if (len == 2)
|
|
|
|
sum = csum_add(sum, (__force __wsum)*(const u16 *)buff);
|
|
|
|
if (len >= 4)
|
|
|
|
sum = csum_add(sum, (__force __wsum)*(const u32 *)buff);
|
|
|
|
if (len == 6)
|
|
|
|
sum = csum_add(sum, (__force __wsum)
|
|
|
|
*(const u16 *)(buff + 4));
|
|
|
|
if (len >= 8)
|
|
|
|
sum = csum_add(sum, (__force __wsum)
|
|
|
|
*(const u32 *)(buff + 4));
|
|
|
|
if (len == 10)
|
|
|
|
sum = csum_add(sum, (__force __wsum)
|
|
|
|
*(const u16 *)(buff + 8));
|
|
|
|
if (len >= 12)
|
|
|
|
sum = csum_add(sum, (__force __wsum)
|
|
|
|
*(const u32 *)(buff + 8));
|
|
|
|
if (len == 14)
|
|
|
|
sum = csum_add(sum, (__force __wsum)
|
|
|
|
*(const u16 *)(buff + 12));
|
|
|
|
if (len >= 16)
|
|
|
|
sum = csum_add(sum, (__force __wsum)
|
|
|
|
*(const u32 *)(buff + 12));
|
|
|
|
} else if (__builtin_constant_p(len) && (len & 3) == 0) {
|
|
|
|
sum = csum_add(sum, ip_fast_csum_nofold(buff, len >> 2));
|
|
|
|
} else {
|
|
|
|
sum = __csum_partial(buff, len, sum);
|
|
|
|
}
|
|
|
|
return sum;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* this routine is used for miscellaneous IP-like checksums, mainly
|
|
|
|
* in icmp.c
|
|
|
|
*/
|
|
|
|
static inline __sum16 ip_compute_csum(const void *buff, int len)
|
|
|
|
{
|
|
|
|
return csum_fold(csum_partial(buff, len, 0));
|
|
|
|
}
|
|
|
|
|
2013-09-23 10:04:51 +08:00
|
|
|
#endif
|
2005-12-17 05:43:46 +08:00
|
|
|
#endif /* __KERNEL__ */
|
2005-10-21 00:44:46 +08:00
|
|
|
#endif
|