net: percpu net_device refcount
We tried very hard to remove all possible dev_hold()/dev_put() pairs in network stack, using RCU conversions. There is still an unavoidable device refcount change for every dst we create/destroy, and this can slow down some workloads (routers or some app servers, mmap af_packet) We can switch to a percpu refcount implementation, now dynamic per_cpu infrastructure is mature. On a 64 cpus machine, this consumes 256 bytes per device. On x86, dev_hold(dev) code : before lock incl 0x280(%ebx) after: movl 0x260(%ebx),%eax incl fs:(%eax) Stress bench : (Sending 160.000.000 UDP frames, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_TRIE) Before: real 1m1.662s user 0m14.373s sys 12m55.960s After: real 0m51.179s user 0m15.329s sys 10m15.942s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
f0b9f47251
commit
29b4433d99
|
@ -2701,7 +2701,7 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
|
||||||
nesibdev = nesvnic->nesibdev;
|
nesibdev = nesvnic->nesibdev;
|
||||||
|
|
||||||
nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
|
nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
|
||||||
atomic_read(&nesvnic->netdev->refcnt));
|
netdev_refcnt_read(nesvnic->netdev));
|
||||||
|
|
||||||
if (nesqp->active_conn) {
|
if (nesqp->active_conn) {
|
||||||
|
|
||||||
|
@ -2791,7 +2791,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
|
||||||
atomic_inc(&cm_accepts);
|
atomic_inc(&cm_accepts);
|
||||||
|
|
||||||
nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
|
nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
|
||||||
atomic_read(&nesvnic->netdev->refcnt));
|
netdev_refcnt_read(nesvnic->netdev));
|
||||||
|
|
||||||
/* allocate the ietf frame and space for private data */
|
/* allocate the ietf frame and space for private data */
|
||||||
nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
|
nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
|
||||||
|
|
|
@ -785,7 +785,7 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
|
||||||
|
|
||||||
nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n",
|
nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n",
|
||||||
nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context,
|
nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context,
|
||||||
atomic_read(&nesvnic->netdev->refcnt));
|
netdev_refcnt_read(nesvnic->netdev));
|
||||||
|
|
||||||
err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
|
err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
|
||||||
nesadapter->max_pd, &pd_num, &nesadapter->next_pd);
|
nesadapter->max_pd, &pd_num, &nesadapter->next_pd);
|
||||||
|
@ -1416,7 +1416,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
|
||||||
/* update the QP table */
|
/* update the QP table */
|
||||||
nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
|
nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
|
||||||
nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
|
nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
|
||||||
atomic_read(&nesvnic->netdev->refcnt));
|
netdev_refcnt_read(nesvnic->netdev));
|
||||||
|
|
||||||
return &nesqp->ibqp;
|
return &nesqp->ibqp;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1026,7 +1026,7 @@ struct net_device {
|
||||||
struct timer_list watchdog_timer;
|
struct timer_list watchdog_timer;
|
||||||
|
|
||||||
/* Number of references to this device */
|
/* Number of references to this device */
|
||||||
atomic_t refcnt ____cacheline_aligned_in_smp;
|
int __percpu *pcpu_refcnt;
|
||||||
|
|
||||||
/* delayed register/unregister */
|
/* delayed register/unregister */
|
||||||
struct list_head todo_list;
|
struct list_head todo_list;
|
||||||
|
@ -1330,6 +1330,7 @@ static inline void unregister_netdevice(struct net_device *dev)
|
||||||
unregister_netdevice_queue(dev, NULL);
|
unregister_netdevice_queue(dev, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern int netdev_refcnt_read(const struct net_device *dev);
|
||||||
extern void free_netdev(struct net_device *dev);
|
extern void free_netdev(struct net_device *dev);
|
||||||
extern void synchronize_net(void);
|
extern void synchronize_net(void);
|
||||||
extern int register_netdevice_notifier(struct notifier_block *nb);
|
extern int register_netdevice_notifier(struct notifier_block *nb);
|
||||||
|
@ -1798,7 +1799,7 @@ extern void netdev_run_todo(void);
|
||||||
*/
|
*/
|
||||||
static inline void dev_put(struct net_device *dev)
|
static inline void dev_put(struct net_device *dev)
|
||||||
{
|
{
|
||||||
atomic_dec(&dev->refcnt);
|
irqsafe_cpu_dec(*dev->pcpu_refcnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1809,7 +1810,7 @@ static inline void dev_put(struct net_device *dev)
|
||||||
*/
|
*/
|
||||||
static inline void dev_hold(struct net_device *dev)
|
static inline void dev_hold(struct net_device *dev)
|
||||||
{
|
{
|
||||||
atomic_inc(&dev->refcnt);
|
irqsafe_cpu_inc(*dev->pcpu_refcnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Carrier loss detection, dial on demand. The functions netif_carrier_on
|
/* Carrier loss detection, dial on demand. The functions netif_carrier_on
|
||||||
|
|
|
@ -5192,9 +5192,6 @@ int init_dummy_netdev(struct net_device *dev)
|
||||||
*/
|
*/
|
||||||
dev->reg_state = NETREG_DUMMY;
|
dev->reg_state = NETREG_DUMMY;
|
||||||
|
|
||||||
/* initialize the ref count */
|
|
||||||
atomic_set(&dev->refcnt, 1);
|
|
||||||
|
|
||||||
/* NAPI wants this */
|
/* NAPI wants this */
|
||||||
INIT_LIST_HEAD(&dev->napi_list);
|
INIT_LIST_HEAD(&dev->napi_list);
|
||||||
|
|
||||||
|
@ -5202,6 +5199,11 @@ int init_dummy_netdev(struct net_device *dev)
|
||||||
set_bit(__LINK_STATE_PRESENT, &dev->state);
|
set_bit(__LINK_STATE_PRESENT, &dev->state);
|
||||||
set_bit(__LINK_STATE_START, &dev->state);
|
set_bit(__LINK_STATE_START, &dev->state);
|
||||||
|
|
||||||
|
/* Note : We dont allocate pcpu_refcnt for dummy devices,
|
||||||
|
* because users of this 'device' dont need to change
|
||||||
|
* its refcount.
|
||||||
|
*/
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(init_dummy_netdev);
|
EXPORT_SYMBOL_GPL(init_dummy_netdev);
|
||||||
|
@ -5243,6 +5245,16 @@ out:
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(register_netdev);
|
EXPORT_SYMBOL(register_netdev);
|
||||||
|
|
||||||
|
int netdev_refcnt_read(const struct net_device *dev)
|
||||||
|
{
|
||||||
|
int i, refcnt = 0;
|
||||||
|
|
||||||
|
for_each_possible_cpu(i)
|
||||||
|
refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
|
||||||
|
return refcnt;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(netdev_refcnt_read);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* netdev_wait_allrefs - wait until all references are gone.
|
* netdev_wait_allrefs - wait until all references are gone.
|
||||||
*
|
*
|
||||||
|
@ -5257,11 +5269,14 @@ EXPORT_SYMBOL(register_netdev);
|
||||||
static void netdev_wait_allrefs(struct net_device *dev)
|
static void netdev_wait_allrefs(struct net_device *dev)
|
||||||
{
|
{
|
||||||
unsigned long rebroadcast_time, warning_time;
|
unsigned long rebroadcast_time, warning_time;
|
||||||
|
int refcnt;
|
||||||
|
|
||||||
linkwatch_forget_dev(dev);
|
linkwatch_forget_dev(dev);
|
||||||
|
|
||||||
rebroadcast_time = warning_time = jiffies;
|
rebroadcast_time = warning_time = jiffies;
|
||||||
while (atomic_read(&dev->refcnt) != 0) {
|
refcnt = netdev_refcnt_read(dev);
|
||||||
|
|
||||||
|
while (refcnt != 0) {
|
||||||
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
|
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
|
||||||
rtnl_lock();
|
rtnl_lock();
|
||||||
|
|
||||||
|
@ -5288,11 +5303,13 @@ static void netdev_wait_allrefs(struct net_device *dev)
|
||||||
|
|
||||||
msleep(250);
|
msleep(250);
|
||||||
|
|
||||||
|
refcnt = netdev_refcnt_read(dev);
|
||||||
|
|
||||||
if (time_after(jiffies, warning_time + 10 * HZ)) {
|
if (time_after(jiffies, warning_time + 10 * HZ)) {
|
||||||
printk(KERN_EMERG "unregister_netdevice: "
|
printk(KERN_EMERG "unregister_netdevice: "
|
||||||
"waiting for %s to become free. Usage "
|
"waiting for %s to become free. Usage "
|
||||||
"count = %d\n",
|
"count = %d\n",
|
||||||
dev->name, atomic_read(&dev->refcnt));
|
dev->name, refcnt);
|
||||||
warning_time = jiffies;
|
warning_time = jiffies;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5350,7 +5367,7 @@ void netdev_run_todo(void)
|
||||||
netdev_wait_allrefs(dev);
|
netdev_wait_allrefs(dev);
|
||||||
|
|
||||||
/* paranoia */
|
/* paranoia */
|
||||||
BUG_ON(atomic_read(&dev->refcnt));
|
BUG_ON(netdev_refcnt_read(dev));
|
||||||
WARN_ON(rcu_dereference_raw(dev->ip_ptr));
|
WARN_ON(rcu_dereference_raw(dev->ip_ptr));
|
||||||
WARN_ON(dev->ip6_ptr);
|
WARN_ON(dev->ip6_ptr);
|
||||||
WARN_ON(dev->dn_ptr);
|
WARN_ON(dev->dn_ptr);
|
||||||
|
@ -5520,9 +5537,13 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
|
||||||
dev = PTR_ALIGN(p, NETDEV_ALIGN);
|
dev = PTR_ALIGN(p, NETDEV_ALIGN);
|
||||||
dev->padded = (char *)dev - (char *)p;
|
dev->padded = (char *)dev - (char *)p;
|
||||||
|
|
||||||
if (dev_addr_init(dev))
|
dev->pcpu_refcnt = alloc_percpu(int);
|
||||||
|
if (!dev->pcpu_refcnt)
|
||||||
goto free_tx;
|
goto free_tx;
|
||||||
|
|
||||||
|
if (dev_addr_init(dev))
|
||||||
|
goto free_pcpu;
|
||||||
|
|
||||||
dev_mc_init(dev);
|
dev_mc_init(dev);
|
||||||
dev_uc_init(dev);
|
dev_uc_init(dev);
|
||||||
|
|
||||||
|
@ -5553,6 +5574,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
|
||||||
|
|
||||||
free_tx:
|
free_tx:
|
||||||
kfree(tx);
|
kfree(tx);
|
||||||
|
free_pcpu:
|
||||||
|
free_percpu(dev->pcpu_refcnt);
|
||||||
free_p:
|
free_p:
|
||||||
kfree(p);
|
kfree(p);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -5586,6 +5609,9 @@ void free_netdev(struct net_device *dev)
|
||||||
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
|
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
|
||||||
netif_napi_del(p);
|
netif_napi_del(p);
|
||||||
|
|
||||||
|
free_percpu(dev->pcpu_refcnt);
|
||||||
|
dev->pcpu_refcnt = NULL;
|
||||||
|
|
||||||
/* Compatibility with error handling in drivers */
|
/* Compatibility with error handling in drivers */
|
||||||
if (dev->reg_state == NETREG_UNINITIALIZED) {
|
if (dev->reg_state == NETREG_UNINITIALIZED) {
|
||||||
kfree((char *)dev - dev->padded);
|
kfree((char *)dev - dev->padded);
|
||||||
|
|
Loading…
Reference in New Issue