powerpc/64: Use optimized checksum routines on little-endian
Currently we have optimized hand-coded assembly checksum routines for big-endian 64-bit systems, but for little-endian we use the generic C routines. This modifies the optimized routines to work for little-endian. With this, we no longer need to enable CONFIG_GENERIC_CSUM. This also fixes a couple of comments in checksum_64.S so they accurately reflect what the associated instruction does. Signed-off-by: Paul Mackerras <paulus@ozlabs.org> [mpe: Use the more common __BIG_ENDIAN__] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
parent
b492f7e4e0
commit
d4fde568a3
|
@ -167,7 +167,7 @@ config PPC
|
||||||
select HAVE_CC_STACKPROTECTOR
|
select HAVE_CC_STACKPROTECTOR
|
||||||
|
|
||||||
config GENERIC_CSUM
|
config GENERIC_CSUM
|
||||||
def_bool CPU_LITTLE_ENDIAN
|
def_bool n
|
||||||
|
|
||||||
config EARLY_PRINTK
|
config EARLY_PRINTK
|
||||||
bool
|
bool
|
||||||
|
|
|
@ -70,7 +70,11 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
|
||||||
|
|
||||||
s += (__force u32)saddr;
|
s += (__force u32)saddr;
|
||||||
s += (__force u32)daddr;
|
s += (__force u32)daddr;
|
||||||
|
#ifdef __BIG_ENDIAN__
|
||||||
s += proto + len;
|
s += proto + len;
|
||||||
|
#else
|
||||||
|
s += (proto + len) << 8;
|
||||||
|
#endif
|
||||||
return (__force __wsum) from64to32(s);
|
return (__force __wsum) from64to32(s);
|
||||||
#else
|
#else
|
||||||
__asm__("\n\
|
__asm__("\n\
|
||||||
|
|
|
@ -21,9 +21,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \
|
||||||
obj64-$(CONFIG_SMP) += locks.o
|
obj64-$(CONFIG_SMP) += locks.o
|
||||||
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
|
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
|
||||||
|
|
||||||
ifeq ($(CONFIG_GENERIC_CSUM),)
|
|
||||||
obj-y += checksum_$(BITS).o checksum_wrappers.o
|
obj-y += checksum_$(BITS).o checksum_wrappers.o
|
||||||
endif
|
|
||||||
|
|
||||||
obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
|
obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ _GLOBAL(__csum_partial)
|
||||||
* work to calculate the correct checksum, we ignore that case
|
* work to calculate the correct checksum, we ignore that case
|
||||||
* and take the potential slowdown of unaligned loads.
|
* and take the potential slowdown of unaligned loads.
|
||||||
*/
|
*/
|
||||||
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
|
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
|
||||||
beq .Lcsum_aligned
|
beq .Lcsum_aligned
|
||||||
|
|
||||||
li r7,4
|
li r7,4
|
||||||
|
@ -168,8 +168,12 @@ _GLOBAL(__csum_partial)
|
||||||
beq .Lcsum_finish
|
beq .Lcsum_finish
|
||||||
|
|
||||||
lbz r6,0(r3)
|
lbz r6,0(r3)
|
||||||
|
#ifdef __BIG_ENDIAN__
|
||||||
sldi r9,r6,8 /* Pad the byte out to 16 bits */
|
sldi r9,r6,8 /* Pad the byte out to 16 bits */
|
||||||
adde r0,r0,r9
|
adde r0,r0,r9
|
||||||
|
#else
|
||||||
|
adde r0,r0,r6
|
||||||
|
#endif
|
||||||
|
|
||||||
.Lcsum_finish:
|
.Lcsum_finish:
|
||||||
addze r0,r0 /* add in final carry */
|
addze r0,r0 /* add in final carry */
|
||||||
|
@ -224,7 +228,7 @@ _GLOBAL(csum_partial_copy_generic)
|
||||||
* If the source and destination are relatively unaligned we only
|
* If the source and destination are relatively unaligned we only
|
||||||
* align the source. This keeps things simple.
|
* align the source. This keeps things simple.
|
||||||
*/
|
*/
|
||||||
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
|
rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
|
||||||
beq .Lcopy_aligned
|
beq .Lcopy_aligned
|
||||||
|
|
||||||
li r9,4
|
li r9,4
|
||||||
|
@ -386,8 +390,12 @@ dstnr; sth r6,0(r4)
|
||||||
beq .Lcopy_finish
|
beq .Lcopy_finish
|
||||||
|
|
||||||
srcnr; lbz r6,0(r3)
|
srcnr; lbz r6,0(r3)
|
||||||
|
#ifdef __BIG_ENDIAN__
|
||||||
sldi r9,r6,8 /* Pad the byte out to 16 bits */
|
sldi r9,r6,8 /* Pad the byte out to 16 bits */
|
||||||
adde r0,r0,r9
|
adde r0,r0,r9
|
||||||
|
#else
|
||||||
|
adde r0,r0,r6
|
||||||
|
#endif
|
||||||
dstnr; stb r6,0(r4)
|
dstnr; stb r6,0(r4)
|
||||||
|
|
||||||
.Lcopy_finish:
|
.Lcopy_finish:
|
||||||
|
|
Loading…
Reference in New Issue