diff --git a/Documentation/networking/net_failover.rst b/Documentation/networking/net_failover.rst index d4513ad31809..70ca2f5800c4 100644 --- a/Documentation/networking/net_failover.rst +++ b/Documentation/networking/net_failover.rst @@ -24,3 +24,93 @@ This can be used by paravirtual drivers to enable an alternate low latency datapath. It also enables hypervisor controlled live migration of a VM with direct attached VF by failing over to the paravirtual datapath when the VF is unplugged. + +virtio-net accelerated datapath: STANDBY mode +============================================= + +net_failover enables hypervisor controlled accelerated datapath to virtio-net +enabled VMs in a transparent manner with no/minimal guest userspace chanages. + +To support this, the hypervisor needs to enable VIRTIO_NET_F_STANDBY +feature on the virtio-net interface and assign the same MAC address to both +virtio-net and VF interfaces. + +Here is an example XML snippet that shows such configuration. + + + + + + + + +
+ + + + +
+ +
+ + +Booting a VM with the above configuration will result in the following 3 +netdevs created in the VM. + +4: ens10: mtu 1500 qdisc noqueue state UP group default qlen 1000 + link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff + inet 192.168.12.53/24 brd 192.168.12.255 scope global dynamic ens10 + valid_lft 42482sec preferred_lft 42482sec + inet6 fe80::97d8:db2:8c10:b6d6/64 scope link + valid_lft forever preferred_lft forever +5: ens10nsby: mtu 1500 qdisc fq_codel master ens10 state UP group default qlen 1000 + link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff +7: ens11: mtu 1500 qdisc mq master ens10 state UP group default qlen 1000 + link/ether 52:54:00:00:12:53 brd ff:ff:ff:ff:ff:ff + +ens10 is the 'failover' master netdev, ens10nsby and ens11 are the slave +'standby' and 'primary' netdevs respectively. + +Live Migration of a VM with SR-IOV VF & virtio-net in STANDBY mode +================================================================== + +net_failover also enables hypervisor controlled live migration to be supported +with VMs that have direct attached SR-IOV VF devices by automatic failover to +the paravirtual datapath when the VF is unplugged. + +Here is a sample script that shows the steps to initiate live migration on +the source hypervisor. + +# cat vf_xml + + + +
+ +
+ + +# Source Hypervisor +#!/bin/bash + +DOMAIN=fedora27-tap01 +PF=enp66s0f0 +VF_NUM=5 +TAP_IF=tap01 +VF_XML= + +MAC=52:54:00:00:12:53 +ZERO_MAC=00:00:00:00:00:00 + +virsh domif-setlink $DOMAIN $TAP_IF up +bridge fdb del $MAC dev $PF master +virsh detach-device $DOMAIN $VF_XML +ip link set $PF vf $VF_NUM mac $ZERO_MAC + +virsh migrate --live $DOMAIN qemu+ssh://$REMOTE_HOST/system + +# Destination Hypervisor +#!/bin/bash + +virsh attach-device $DOMAIN $VF_XML +virsh domif-setlink $DOMAIN $TAP_IF down diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 2cdaff90a9ec..d03775100f7d 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -332,6 +332,7 @@ config VETH config VIRTIO_NET tristate "Virtio network driver" depends on VIRTIO + select NET_FAILOVER ---help--- This is the virtual network driver for virtio. It can be used with QEMU based VMMs (like KVM or Xen). Say Y or M. diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fe4534ee50a1..8f08a3e1bbaa 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -30,8 +30,11 @@ #include #include #include +#include +#include #include #include +#include static int napi_weight = NAPI_POLL_WEIGHT; module_param(napi_weight, int, 0444); @@ -210,6 +213,9 @@ struct virtnet_info { u32 speed; unsigned long guest_offloads; + + /* failover when STANDBY feature enabled */ + struct failover *failover; }; struct padded_vnet_hdr { @@ -1554,6 +1560,9 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) struct sockaddr *addr; struct scatterlist sg; + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) + return -EOPNOTSUPP; + addr = kmemdup(p, sizeof(*addr), GFP_KERNEL); if (!addr) return -ENOMEM; @@ -2337,6 +2346,22 @@ static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } +static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, + size_t len) +{ + struct virtnet_info *vi = netdev_priv(dev); + int ret; + + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STANDBY)) + return -EOPNOTSUPP; + + ret = snprintf(buf, len, "sby"); + if (ret >= len) + return -EOPNOTSUPP; + + return 0; +} + static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, @@ -2354,6 +2379,7 @@ static const struct net_device_ops virtnet_netdev = { .ndo_xdp_xmit = virtnet_xdp_xmit, .ndo_xdp_flush = virtnet_xdp_flush, .ndo_features_check = passthru_features_check, + .ndo_get_phys_port_name = virtnet_get_phys_port_name, }; static void virtnet_config_changed_work(struct work_struct *work) @@ -2907,10 +2933,16 @@ static int virtnet_probe(struct virtio_device *vdev) virtnet_init_settings(dev); + if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) { + vi->failover = net_failover_create(vi->dev); + if (IS_ERR(vi->failover)) + goto free_vqs; + } + err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); - goto free_vqs; + goto free_failover; } virtio_device_ready(vdev); @@ -2947,6 +2979,8 @@ free_unregister_netdev: vi->vdev->config->reset(vdev); unregister_netdev(dev); +free_failover: + net_failover_destroy(vi->failover); free_vqs: cancel_delayed_work_sync(&vi->refill); free_receive_page_frags(vi); @@ -2981,6 +3015,8 @@ static void virtnet_remove(struct virtio_device *vdev) unregister_netdev(vi->dev); + net_failover_destroy(vi->failover); + remove_vq_common(vi); free_netdev(vi->dev);