[NET]: Implement SKB fast cloning.
Protocols that make extensive use of SKB cloning, for example TCP, eat at least 2 allocations per packet sent as a result. To cut the kmalloc() count in half, we implement a pre-allocation scheme wherein we allocate 2 sk_buff objects in advance, then use a simple reference count to free up the memory at the correct time. Based upon an initial patch by Thomas Graf and suggestions from Herbert Xu. Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
e92ae93a8a
commit
d179cd1292
|
@ -162,6 +162,13 @@ struct skb_timeval {
|
||||||
u32 off_usec;
|
u32 off_usec;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
enum {
|
||||||
|
SKB_FCLONE_UNAVAILABLE,
|
||||||
|
SKB_FCLONE_ORIG,
|
||||||
|
SKB_FCLONE_CLONE,
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct sk_buff - socket buffer
|
* struct sk_buff - socket buffer
|
||||||
* @next: Next buffer in list
|
* @next: Next buffer in list
|
||||||
|
@ -255,7 +262,8 @@ struct sk_buff {
|
||||||
ip_summed:2,
|
ip_summed:2,
|
||||||
nohdr:1,
|
nohdr:1,
|
||||||
nfctinfo:3;
|
nfctinfo:3;
|
||||||
__u8 pkt_type;
|
__u8 pkt_type:3,
|
||||||
|
fclone:2;
|
||||||
__be16 protocol;
|
__be16 protocol;
|
||||||
|
|
||||||
void (*destructor)(struct sk_buff *skb);
|
void (*destructor)(struct sk_buff *skb);
|
||||||
|
@ -295,8 +303,20 @@ struct sk_buff {
|
||||||
#include <asm/system.h>
|
#include <asm/system.h>
|
||||||
|
|
||||||
extern void __kfree_skb(struct sk_buff *skb);
|
extern void __kfree_skb(struct sk_buff *skb);
|
||||||
extern struct sk_buff *alloc_skb(unsigned int size,
|
extern struct sk_buff *__alloc_skb(unsigned int size,
|
||||||
unsigned int __nocast priority);
|
unsigned int __nocast priority, int fclone);
|
||||||
|
static inline struct sk_buff *alloc_skb(unsigned int size,
|
||||||
|
unsigned int __nocast priority)
|
||||||
|
{
|
||||||
|
return __alloc_skb(size, priority, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
|
||||||
|
unsigned int __nocast priority)
|
||||||
|
{
|
||||||
|
return __alloc_skb(size, priority, 1);
|
||||||
|
}
|
||||||
|
|
||||||
extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
|
extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
|
||||||
unsigned int size,
|
unsigned int size,
|
||||||
unsigned int __nocast priority);
|
unsigned int __nocast priority);
|
||||||
|
|
|
@ -1200,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
|
||||||
int hdr_len;
|
int hdr_len;
|
||||||
|
|
||||||
hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
|
hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
|
||||||
skb = alloc_skb(size + hdr_len, gfp);
|
skb = alloc_skb_fclone(size + hdr_len, gfp);
|
||||||
if (skb) {
|
if (skb) {
|
||||||
skb->truesize += mem;
|
skb->truesize += mem;
|
||||||
if (sk->sk_forward_alloc >= (int)skb->truesize ||
|
if (sk->sk_forward_alloc >= (int)skb->truesize ||
|
||||||
|
|
|
@ -69,6 +69,7 @@
|
||||||
#include <asm/system.h>
|
#include <asm/system.h>
|
||||||
|
|
||||||
static kmem_cache_t *skbuff_head_cache;
|
static kmem_cache_t *skbuff_head_cache;
|
||||||
|
static kmem_cache_t *skbuff_fclone_cache;
|
||||||
|
|
||||||
struct timeval __read_mostly skb_tv_base;
|
struct timeval __read_mostly skb_tv_base;
|
||||||
|
|
||||||
|
@ -120,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* alloc_skb - allocate a network buffer
|
* __alloc_skb - allocate a network buffer
|
||||||
* @size: size to allocate
|
* @size: size to allocate
|
||||||
* @gfp_mask: allocation mask
|
* @gfp_mask: allocation mask
|
||||||
*
|
*
|
||||||
|
@ -131,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
|
||||||
* Buffers may only be allocated from interrupts using a @gfp_mask of
|
* Buffers may only be allocated from interrupts using a @gfp_mask of
|
||||||
* %GFP_ATOMIC.
|
* %GFP_ATOMIC.
|
||||||
*/
|
*/
|
||||||
struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
|
struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask,
|
||||||
|
int fclone)
|
||||||
{
|
{
|
||||||
struct sk_buff *skb;
|
struct sk_buff *skb;
|
||||||
u8 *data;
|
u8 *data;
|
||||||
|
|
||||||
/* Get the HEAD */
|
/* Get the HEAD */
|
||||||
|
if (fclone)
|
||||||
|
skb = kmem_cache_alloc(skbuff_fclone_cache,
|
||||||
|
gfp_mask & ~__GFP_DMA);
|
||||||
|
else
|
||||||
skb = kmem_cache_alloc(skbuff_head_cache,
|
skb = kmem_cache_alloc(skbuff_head_cache,
|
||||||
gfp_mask & ~__GFP_DMA);
|
gfp_mask & ~__GFP_DMA);
|
||||||
|
|
||||||
if (!skb)
|
if (!skb)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
@ -155,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
|
||||||
skb->data = data;
|
skb->data = data;
|
||||||
skb->tail = data;
|
skb->tail = data;
|
||||||
skb->end = data + size;
|
skb->end = data + size;
|
||||||
|
if (fclone) {
|
||||||
|
struct sk_buff *child = skb + 1;
|
||||||
|
atomic_t *fclone_ref = (atomic_t *) (child + 1);
|
||||||
|
|
||||||
|
skb->fclone = SKB_FCLONE_ORIG;
|
||||||
|
atomic_set(fclone_ref, 1);
|
||||||
|
|
||||||
|
child->fclone = SKB_FCLONE_UNAVAILABLE;
|
||||||
|
}
|
||||||
atomic_set(&(skb_shinfo(skb)->dataref), 1);
|
atomic_set(&(skb_shinfo(skb)->dataref), 1);
|
||||||
skb_shinfo(skb)->nr_frags = 0;
|
skb_shinfo(skb)->nr_frags = 0;
|
||||||
skb_shinfo(skb)->tso_size = 0;
|
skb_shinfo(skb)->tso_size = 0;
|
||||||
|
@ -268,8 +283,34 @@ void skb_release_data(struct sk_buff *skb)
|
||||||
*/
|
*/
|
||||||
void kfree_skbmem(struct sk_buff *skb)
|
void kfree_skbmem(struct sk_buff *skb)
|
||||||
{
|
{
|
||||||
|
struct sk_buff *other;
|
||||||
|
atomic_t *fclone_ref;
|
||||||
|
|
||||||
skb_release_data(skb);
|
skb_release_data(skb);
|
||||||
|
switch (skb->fclone) {
|
||||||
|
case SKB_FCLONE_UNAVAILABLE:
|
||||||
kmem_cache_free(skbuff_head_cache, skb);
|
kmem_cache_free(skbuff_head_cache, skb);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SKB_FCLONE_ORIG:
|
||||||
|
fclone_ref = (atomic_t *) (skb + 2);
|
||||||
|
if (atomic_dec_and_test(fclone_ref))
|
||||||
|
kmem_cache_free(skbuff_fclone_cache, skb);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SKB_FCLONE_CLONE:
|
||||||
|
fclone_ref = (atomic_t *) (skb + 1);
|
||||||
|
other = skb - 1;
|
||||||
|
|
||||||
|
/* The clone portion is available for
|
||||||
|
* fast-cloning again.
|
||||||
|
*/
|
||||||
|
skb->fclone = SKB_FCLONE_UNAVAILABLE;
|
||||||
|
|
||||||
|
if (atomic_dec_and_test(fclone_ref))
|
||||||
|
kmem_cache_free(skbuff_fclone_cache, other);
|
||||||
|
break;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -324,10 +365,20 @@ void __kfree_skb(struct sk_buff *skb)
|
||||||
|
|
||||||
struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
|
struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
|
||||||
{
|
{
|
||||||
struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
|
struct sk_buff *n;
|
||||||
|
|
||||||
|
n = skb + 1;
|
||||||
|
if (skb->fclone == SKB_FCLONE_ORIG &&
|
||||||
|
n->fclone == SKB_FCLONE_UNAVAILABLE) {
|
||||||
|
atomic_t *fclone_ref = (atomic_t *) (n + 1);
|
||||||
|
n->fclone = SKB_FCLONE_CLONE;
|
||||||
|
atomic_inc(fclone_ref);
|
||||||
|
} else {
|
||||||
|
n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
|
||||||
if (!n)
|
if (!n)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
n->fclone = SKB_FCLONE_UNAVAILABLE;
|
||||||
|
}
|
||||||
|
|
||||||
#define C(x) n->x = skb->x
|
#define C(x) n->x = skb->x
|
||||||
|
|
||||||
|
@ -409,6 +460,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
|
||||||
new->mac.raw = old->mac.raw + offset;
|
new->mac.raw = old->mac.raw + offset;
|
||||||
memcpy(new->cb, old->cb, sizeof(old->cb));
|
memcpy(new->cb, old->cb, sizeof(old->cb));
|
||||||
new->local_df = old->local_df;
|
new->local_df = old->local_df;
|
||||||
|
new->fclone = SKB_FCLONE_UNAVAILABLE;
|
||||||
new->pkt_type = old->pkt_type;
|
new->pkt_type = old->pkt_type;
|
||||||
new->tstamp = old->tstamp;
|
new->tstamp = old->tstamp;
|
||||||
new->destructor = NULL;
|
new->destructor = NULL;
|
||||||
|
@ -1647,13 +1699,23 @@ void __init skb_init(void)
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
if (!skbuff_head_cache)
|
if (!skbuff_head_cache)
|
||||||
panic("cannot create skbuff cache");
|
panic("cannot create skbuff cache");
|
||||||
|
|
||||||
|
skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
|
||||||
|
(2*sizeof(struct sk_buff)) +
|
||||||
|
sizeof(atomic_t),
|
||||||
|
0,
|
||||||
|
SLAB_HWCACHE_ALIGN,
|
||||||
|
NULL, NULL);
|
||||||
|
if (!skbuff_fclone_cache)
|
||||||
|
panic("cannot create skbuff cache");
|
||||||
|
|
||||||
do_gettimeofday(&skb_tv_base);
|
do_gettimeofday(&skb_tv_base);
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPORT_SYMBOL(___pskb_trim);
|
EXPORT_SYMBOL(___pskb_trim);
|
||||||
EXPORT_SYMBOL(__kfree_skb);
|
EXPORT_SYMBOL(__kfree_skb);
|
||||||
EXPORT_SYMBOL(__pskb_pull_tail);
|
EXPORT_SYMBOL(__pskb_pull_tail);
|
||||||
EXPORT_SYMBOL(alloc_skb);
|
EXPORT_SYMBOL(__alloc_skb);
|
||||||
EXPORT_SYMBOL(pskb_copy);
|
EXPORT_SYMBOL(pskb_copy);
|
||||||
EXPORT_SYMBOL(pskb_expand_head);
|
EXPORT_SYMBOL(pskb_expand_head);
|
||||||
EXPORT_SYMBOL(skb_checksum);
|
EXPORT_SYMBOL(skb_checksum);
|
||||||
|
|
|
@ -1582,7 +1582,7 @@ void tcp_send_fin(struct sock *sk)
|
||||||
} else {
|
} else {
|
||||||
/* Socket is locked, keep trying until memory is available. */
|
/* Socket is locked, keep trying until memory is available. */
|
||||||
for (;;) {
|
for (;;) {
|
||||||
skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL);
|
skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
|
||||||
if (skb)
|
if (skb)
|
||||||
break;
|
break;
|
||||||
yield();
|
yield();
|
||||||
|
@ -1804,7 +1804,7 @@ int tcp_connect(struct sock *sk)
|
||||||
|
|
||||||
tcp_connect_init(sk);
|
tcp_connect_init(sk);
|
||||||
|
|
||||||
buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
|
buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
|
||||||
if (unlikely(buff == NULL))
|
if (unlikely(buff == NULL))
|
||||||
return -ENOBUFS;
|
return -ENOBUFS;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue