powerpc: Remove duplicate cacheable_memcpy/memzero functions
These functions are only used from one place each. If the cacheable_* versions really are more efficient, then those changes should be migrated into the common code instead. NOTE: The old routines are just flat buggy on kernels that support hardware with different cacheline sizes. Signed-off-by: Kyle Moffett <Kyle.D.Moffett@boeing.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
parent
9eccca0843
commit
b05ae4ee60
|
@ -76,9 +76,6 @@ extern void _set_L3CR(unsigned long);
|
||||||
#define _set_L3CR(val) do { } while(0)
|
#define _set_L3CR(val) do { } while(0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern void cacheable_memzero(void *p, unsigned int nb);
|
|
||||||
extern void *cacheable_memcpy(void *, const void *, unsigned int);
|
|
||||||
|
|
||||||
#endif /* !__ASSEMBLY__ */
|
#endif /* !__ASSEMBLY__ */
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
#endif /* _ASM_POWERPC_CACHE_H */
|
#endif /* _ASM_POWERPC_CACHE_H */
|
||||||
|
|
|
@ -69,54 +69,6 @@ CACHELINE_BYTES = L1_CACHE_BYTES
|
||||||
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
|
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
|
||||||
CACHELINE_MASK = (L1_CACHE_BYTES-1)
|
CACHELINE_MASK = (L1_CACHE_BYTES-1)
|
||||||
|
|
||||||
/*
|
|
||||||
* Use dcbz on the complete cache lines in the destination
|
|
||||||
* to set them to zero. This requires that the destination
|
|
||||||
* area is cacheable. -- paulus
|
|
||||||
*/
|
|
||||||
_GLOBAL(cacheable_memzero)
|
|
||||||
mr r5,r4
|
|
||||||
li r4,0
|
|
||||||
addi r6,r3,-4
|
|
||||||
cmplwi 0,r5,4
|
|
||||||
blt 7f
|
|
||||||
stwu r4,4(r6)
|
|
||||||
beqlr
|
|
||||||
andi. r0,r6,3
|
|
||||||
add r5,r0,r5
|
|
||||||
subf r6,r0,r6
|
|
||||||
clrlwi r7,r6,32-LG_CACHELINE_BYTES
|
|
||||||
add r8,r7,r5
|
|
||||||
srwi r9,r8,LG_CACHELINE_BYTES
|
|
||||||
addic. r9,r9,-1 /* total number of complete cachelines */
|
|
||||||
ble 2f
|
|
||||||
xori r0,r7,CACHELINE_MASK & ~3
|
|
||||||
srwi. r0,r0,2
|
|
||||||
beq 3f
|
|
||||||
mtctr r0
|
|
||||||
4: stwu r4,4(r6)
|
|
||||||
bdnz 4b
|
|
||||||
3: mtctr r9
|
|
||||||
li r7,4
|
|
||||||
10: dcbz r7,r6
|
|
||||||
addi r6,r6,CACHELINE_BYTES
|
|
||||||
bdnz 10b
|
|
||||||
clrlwi r5,r8,32-LG_CACHELINE_BYTES
|
|
||||||
addi r5,r5,4
|
|
||||||
2: srwi r0,r5,2
|
|
||||||
mtctr r0
|
|
||||||
bdz 6f
|
|
||||||
1: stwu r4,4(r6)
|
|
||||||
bdnz 1b
|
|
||||||
6: andi. r5,r5,3
|
|
||||||
7: cmpwi 0,r5,0
|
|
||||||
beqlr
|
|
||||||
mtctr r5
|
|
||||||
addi r6,r6,3
|
|
||||||
8: stbu r4,1(r6)
|
|
||||||
bdnz 8b
|
|
||||||
blr
|
|
||||||
|
|
||||||
_GLOBAL(memset)
|
_GLOBAL(memset)
|
||||||
rlwimi r4,r4,8,16,23
|
rlwimi r4,r4,8,16,23
|
||||||
rlwimi r4,r4,16,0,15
|
rlwimi r4,r4,16,0,15
|
||||||
|
@ -142,85 +94,6 @@ _GLOBAL(memset)
|
||||||
bdnz 8b
|
bdnz 8b
|
||||||
blr
|
blr
|
||||||
|
|
||||||
/*
|
|
||||||
* This version uses dcbz on the complete cache lines in the
|
|
||||||
* destination area to reduce memory traffic. This requires that
|
|
||||||
* the destination area is cacheable.
|
|
||||||
* We only use this version if the source and dest don't overlap.
|
|
||||||
* -- paulus.
|
|
||||||
*/
|
|
||||||
_GLOBAL(cacheable_memcpy)
|
|
||||||
add r7,r3,r5 /* test if the src & dst overlap */
|
|
||||||
add r8,r4,r5
|
|
||||||
cmplw 0,r4,r7
|
|
||||||
cmplw 1,r3,r8
|
|
||||||
crand 0,0,4 /* cr0.lt &= cr1.lt */
|
|
||||||
blt memcpy /* if regions overlap */
|
|
||||||
|
|
||||||
addi r4,r4,-4
|
|
||||||
addi r6,r3,-4
|
|
||||||
neg r0,r3
|
|
||||||
andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
|
|
||||||
beq 58f
|
|
||||||
|
|
||||||
cmplw 0,r5,r0 /* is this more than total to do? */
|
|
||||||
blt 63f /* if not much to do */
|
|
||||||
andi. r8,r0,3 /* get it word-aligned first */
|
|
||||||
subf r5,r0,r5
|
|
||||||
mtctr r8
|
|
||||||
beq+ 61f
|
|
||||||
70: lbz r9,4(r4) /* do some bytes */
|
|
||||||
stb r9,4(r6)
|
|
||||||
addi r4,r4,1
|
|
||||||
addi r6,r6,1
|
|
||||||
bdnz 70b
|
|
||||||
61: srwi. r0,r0,2
|
|
||||||
mtctr r0
|
|
||||||
beq 58f
|
|
||||||
72: lwzu r9,4(r4) /* do some words */
|
|
||||||
stwu r9,4(r6)
|
|
||||||
bdnz 72b
|
|
||||||
|
|
||||||
58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
|
|
||||||
clrlwi r5,r5,32-LG_CACHELINE_BYTES
|
|
||||||
li r11,4
|
|
||||||
mtctr r0
|
|
||||||
beq 63f
|
|
||||||
53:
|
|
||||||
dcbz r11,r6
|
|
||||||
COPY_16_BYTES
|
|
||||||
#if L1_CACHE_BYTES >= 32
|
|
||||||
COPY_16_BYTES
|
|
||||||
#if L1_CACHE_BYTES >= 64
|
|
||||||
COPY_16_BYTES
|
|
||||||
COPY_16_BYTES
|
|
||||||
#if L1_CACHE_BYTES >= 128
|
|
||||||
COPY_16_BYTES
|
|
||||||
COPY_16_BYTES
|
|
||||||
COPY_16_BYTES
|
|
||||||
COPY_16_BYTES
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
bdnz 53b
|
|
||||||
|
|
||||||
63: srwi. r0,r5,2
|
|
||||||
mtctr r0
|
|
||||||
beq 64f
|
|
||||||
30: lwzu r0,4(r4)
|
|
||||||
stwu r0,4(r6)
|
|
||||||
bdnz 30b
|
|
||||||
|
|
||||||
64: andi. r0,r5,3
|
|
||||||
mtctr r0
|
|
||||||
beq+ 65f
|
|
||||||
40: lbz r0,4(r4)
|
|
||||||
stb r0,4(r6)
|
|
||||||
addi r4,r4,1
|
|
||||||
addi r6,r6,1
|
|
||||||
bdnz 40b
|
|
||||||
65: blr
|
|
||||||
|
|
||||||
_GLOBAL(memmove)
|
_GLOBAL(memmove)
|
||||||
cmplw 0,r3,r4
|
cmplw 0,r3,r4
|
||||||
bgt backwards_memcpy
|
bgt backwards_memcpy
|
||||||
|
|
|
@ -8,10 +8,6 @@ EXPORT_SYMBOL(memset);
|
||||||
EXPORT_SYMBOL(memmove);
|
EXPORT_SYMBOL(memmove);
|
||||||
EXPORT_SYMBOL(memcmp);
|
EXPORT_SYMBOL(memcmp);
|
||||||
EXPORT_SYMBOL(memchr);
|
EXPORT_SYMBOL(memchr);
|
||||||
#ifdef CONFIG_PPC32
|
|
||||||
EXPORT_SYMBOL(cacheable_memcpy);
|
|
||||||
EXPORT_SYMBOL(cacheable_memzero);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
EXPORT_SYMBOL(strcpy);
|
EXPORT_SYMBOL(strcpy);
|
||||||
EXPORT_SYMBOL(strncpy);
|
EXPORT_SYMBOL(strncpy);
|
||||||
|
|
|
@ -224,7 +224,7 @@ void __init MMU_init_hw(void)
|
||||||
*/
|
*/
|
||||||
if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
|
if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
|
||||||
Hash = __va(memblock_alloc(Hash_size, Hash_size));
|
Hash = __va(memblock_alloc(Hash_size, Hash_size));
|
||||||
cacheable_memzero(Hash, Hash_size);
|
memset(Hash, 0, Hash_size);
|
||||||
_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
|
_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
|
||||||
|
|
||||||
Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
|
Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
|
||||||
|
|
|
@ -79,13 +79,6 @@ MODULE_AUTHOR
|
||||||
("Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>");
|
("Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>");
|
||||||
MODULE_LICENSE("GPL");
|
MODULE_LICENSE("GPL");
|
||||||
|
|
||||||
/*
|
|
||||||
* PPC64 doesn't (yet) have a cacheable_memcpy
|
|
||||||
*/
|
|
||||||
#ifdef CONFIG_PPC64
|
|
||||||
#define cacheable_memcpy(d,s,n) memcpy((d),(s),(n))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* minimum number of free TX descriptors required to wake up TX process */
|
/* minimum number of free TX descriptors required to wake up TX process */
|
||||||
#define EMAC_TX_WAKEUP_THRESH (NUM_TX_BUFF / 4)
|
#define EMAC_TX_WAKEUP_THRESH (NUM_TX_BUFF / 4)
|
||||||
|
|
||||||
|
@ -1673,7 +1666,7 @@ static inline int emac_rx_sg_append(struct emac_instance *dev, int slot)
|
||||||
dev_kfree_skb(dev->rx_sg_skb);
|
dev_kfree_skb(dev->rx_sg_skb);
|
||||||
dev->rx_sg_skb = NULL;
|
dev->rx_sg_skb = NULL;
|
||||||
} else {
|
} else {
|
||||||
cacheable_memcpy(skb_tail_pointer(dev->rx_sg_skb),
|
memcpy(skb_tail_pointer(dev->rx_sg_skb),
|
||||||
dev->rx_skb[slot]->data, len);
|
dev->rx_skb[slot]->data, len);
|
||||||
skb_put(dev->rx_sg_skb, len);
|
skb_put(dev->rx_sg_skb, len);
|
||||||
emac_recycle_rx_skb(dev, slot, len);
|
emac_recycle_rx_skb(dev, slot, len);
|
||||||
|
@ -1730,8 +1723,7 @@ static int emac_poll_rx(void *param, int budget)
|
||||||
goto oom;
|
goto oom;
|
||||||
|
|
||||||
skb_reserve(copy_skb, EMAC_RX_SKB_HEADROOM + 2);
|
skb_reserve(copy_skb, EMAC_RX_SKB_HEADROOM + 2);
|
||||||
cacheable_memcpy(copy_skb->data - 2, skb->data - 2,
|
memcpy(copy_skb->data - 2, skb->data - 2, len + 2);
|
||||||
len + 2);
|
|
||||||
emac_recycle_rx_skb(dev, slot, len);
|
emac_recycle_rx_skb(dev, slot, len);
|
||||||
skb = copy_skb;
|
skb = copy_skb;
|
||||||
} else if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC)))
|
} else if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC)))
|
||||||
|
|
Loading…
Reference in New Issue