From b0c39dbdc204006ef3558a66716ff09797619778 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 24 Sep 2009 09:59:19 -0600 Subject: [PATCH] virtio_net: don't free buffers in xmit ring The virtio_net driver is complicated by the two methods of freeing old xmit buffers (in addition to freeing old ones at the start of the xmit path). The original code used a 1/10 second timer attached to xmit_free(), reset on every xmit. Before we orphaned skbs on xmit, the transmitting userspace could block with a full socket until the timer fired, the skb destructor was called, and they were re-woken. So we added the VIRTIO_F_NOTIFY_ON_EMPTY feature: supporting devices send an interrupt (even if normally suppressed) on an empty xmit ring which makes us schedule xmit_tasklet(). This was a benchmark win. Unfortunately, VIRTIO_F_NOTIFY_ON_EMPTY makes quite a lot of work: a host which is faster than the guest will fire the interrupt every xmit packet (slowing the guest down further). Attempting mitigation in the host adds overhead of userspace timers (possibly with the additional pain of signals), and risks increasing latency anyway if you get it wrong. In practice, this effect was masked by benchmarks which take advantage of GSO (with its inherent transmit batching), but it's still there. Now we orphan xmitted skbs, the pressure is off: remove both paths and no longer request VIRTIO_F_NOTIFY_ON_EMPTY. Note that the current QEMU will notify us even if we don't negotiate this feature (legal, but suboptimal); a patch is outstanding to improve that. Move the skb_orphan/nf_reset to after we've done the send and notified the other end, for a slight optimization. Signed-off-by: Rusty Russell Cc: Mark McLoughlin --- drivers/net/virtio_net.c | 64 ++++------------------------------------ 1 file changed, 5 insertions(+), 59 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 222f3d098ae4..3041e4eddb3b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -48,16 +48,9 @@ struct virtnet_info struct napi_struct napi; unsigned int status; - /* If we need to free in a timer, this is it. */ - struct timer_list xmit_free_timer; - /* Number of input buffers, and max we've ever had. */ unsigned int num, max; - /* For cleaning up after transmission. */ - struct tasklet_struct tasklet; - bool free_in_tasklet; - /* I like... big packets and I cannot lie! */ bool big_packets; @@ -116,9 +109,6 @@ static void skb_xmit_done(struct virtqueue *svq) /* We were probably waiting for more output buffers. */ netif_wake_queue(vi->dev); - - if (vi->free_in_tasklet) - tasklet_schedule(&vi->tasklet); } static void receive_skb(struct net_device *dev, struct sk_buff *skb, @@ -458,25 +448,9 @@ static void free_old_xmit_skbs(struct virtnet_info *vi) } } -/* If the virtio transport doesn't always notify us when all in-flight packets - * are consumed, we fall back to using this function on a timer to free them. */ -static void xmit_free(unsigned long data) -{ - struct virtnet_info *vi = (void *)data; - - netif_tx_lock(vi->dev); - - free_old_xmit_skbs(vi); - - if (!skb_queue_empty(&vi->send)) - mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); - - netif_tx_unlock(vi->dev); -} - static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) { - int num, err; + int num; struct scatterlist sg[2+MAX_SKB_FRAGS]; struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); @@ -522,25 +496,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) sg_set_buf(sg, hdr, sizeof(*hdr)); num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; - - err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); - if (err >= 0 && !vi->free_in_tasklet) { - /* Don't wait up for transmitted skbs to be freed. */ - skb_orphan(skb); - nf_reset(skb); - mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10)); - } - - return err; -} - -static void xmit_tasklet(unsigned long data) -{ - struct virtnet_info *vi = (void *)data; - - netif_tx_lock_bh(vi->dev); - free_old_xmit_skbs(vi); - netif_tx_unlock_bh(vi->dev); + return vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); } static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -555,6 +511,9 @@ again: __skb_queue_head(&vi->send, skb); if (likely(xmit_skb(vi, skb) >= 0)) { vi->svq->vq_ops->kick(vi->svq); + /* Don't wait up for transmitted skbs to be freed. */ + skb_orphan(skb); + nf_reset(skb); return NETDEV_TX_OK; } @@ -903,10 +862,6 @@ static int virtnet_probe(struct virtio_device *vdev) vi->pages = NULL; INIT_DELAYED_WORK(&vi->refill, refill_work); - /* If they give us a callback when all buffers are done, we don't need - * the timer. */ - vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY); - /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) @@ -938,11 +893,6 @@ static int virtnet_probe(struct virtio_device *vdev) skb_queue_head_init(&vi->recv); skb_queue_head_init(&vi->send); - tasklet_init(&vi->tasklet, xmit_tasklet, (unsigned long)vi); - - if (!vi->free_in_tasklet) - setup_timer(&vi->xmit_free_timer, xmit_free, (unsigned long)vi); - err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); @@ -983,9 +933,6 @@ static void virtnet_remove(struct virtio_device *vdev) /* Stop all the virtqueues. */ vdev->config->reset(vdev); - if (!vi->free_in_tasklet) - del_timer_sync(&vi->xmit_free_timer); - /* Free our skbs in send and recv queues, if any. */ while ((skb = __skb_dequeue(&vi->recv)) != NULL) { kfree_skb(skb); @@ -1019,7 +966,6 @@ static unsigned int features[] = { VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_F_NOTIFY_ON_EMPTY, }; static struct virtio_driver virtio_net = {