Merge branch 'hv_netvsc-fix-races-during-shutdown-and-changes'

Stephen Hemminger says:

====================
hv_netvsc: fix races during shutdown and changes

This set of patches fixes issues identified by Vitaly Kuznetsov and
Mohammed Gamal related to state changes in Hyper-v network driver.

A lot of the issues are because setting up the netvsc device requires
a second step (in work queue) to get all the sub-channels running.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-03-22 12:45:10 -04:00
commit 53e82697f7
4 changed files with 204 additions and 183 deletions

View File

@ -212,7 +212,6 @@ void netvsc_channel_cb(void *context);
int netvsc_poll(struct napi_struct *napi, int budget);
void rndis_set_subchannel(struct work_struct *w);
bool rndis_filter_opened(const struct netvsc_device *nvdev);
int rndis_filter_open(struct netvsc_device *nvdev);
int rndis_filter_close(struct netvsc_device *nvdev);
struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,

View File

@ -90,6 +90,11 @@ static void free_netvsc_device(struct rcu_head *head)
= container_of(head, struct netvsc_device, rcu);
int i;
kfree(nvdev->extension);
vfree(nvdev->recv_buf);
vfree(nvdev->send_buf);
kfree(nvdev->send_section_map);
for (i = 0; i < VRSS_CHANNEL_MAX; i++)
vfree(nvdev->chan_table[i].mrc.slots);
@ -211,12 +216,6 @@ static void netvsc_teardown_gpadl(struct hv_device *device,
net_device->recv_buf_gpadl_handle = 0;
}
if (net_device->recv_buf) {
/* Free up the receive buffer */
vfree(net_device->recv_buf);
net_device->recv_buf = NULL;
}
if (net_device->send_buf_gpadl_handle) {
ret = vmbus_teardown_gpadl(device->channel,
net_device->send_buf_gpadl_handle);
@ -231,12 +230,6 @@ static void netvsc_teardown_gpadl(struct hv_device *device,
}
net_device->send_buf_gpadl_handle = 0;
}
if (net_device->send_buf) {
/* Free up the send buffer */
vfree(net_device->send_buf);
net_device->send_buf = NULL;
}
kfree(net_device->send_section_map);
}
int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
@ -562,26 +555,29 @@ void netvsc_device_remove(struct hv_device *device)
= rtnl_dereference(net_device_ctx->nvdev);
int i;
cancel_work_sync(&net_device->subchan_work);
netvsc_revoke_buf(device, net_device);
RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
/* And disassociate NAPI context from device */
for (i = 0; i < net_device->num_chn; i++)
netif_napi_del(&net_device->chan_table[i].napi);
/*
* At this point, no one should be accessing net_device
* except in here
*/
netdev_dbg(ndev, "net device safe to remove\n");
/* older versions require that buffer be revoked before close */
if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4)
netvsc_teardown_gpadl(device, net_device);
/* Now, we can close the channel safely */
vmbus_close(device->channel);
netvsc_teardown_gpadl(device, net_device);
/* And dissassociate NAPI context from device */
for (i = 0; i < net_device->num_chn; i++)
netif_napi_del(&net_device->chan_table[i].napi);
if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4)
netvsc_teardown_gpadl(device, net_device);
/* Release all resources */
free_netvsc_device_rcu(net_device);
@ -645,14 +641,18 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
queue_sends =
atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
if (net_device->destroy && queue_sends == 0)
wake_up(&net_device->wait_drain);
if (unlikely(net_device->destroy)) {
if (queue_sends == 0)
wake_up(&net_device->wait_drain);
} else {
struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
(hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
queue_sends < 1)) {
netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
ndev_ctx->eth_stats.wake_queue++;
if (netif_tx_queue_stopped(txq) &&
(hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
queue_sends < 1)) {
netif_tx_wake_queue(txq);
ndev_ctx->eth_stats.wake_queue++;
}
}
}

View File

@ -46,7 +46,10 @@
#include "hyperv_net.h"
#define RING_SIZE_MIN 64
#define RING_SIZE_MIN 64
#define RETRY_US_LO 5000
#define RETRY_US_HI 10000
#define RETRY_MAX 2000 /* >10 sec */
#define LINKCHANGE_INT (2 * HZ)
#define VF_TAKEOVER_INT (HZ / 10)
@ -123,10 +126,8 @@ static int netvsc_open(struct net_device *net)
}
rdev = nvdev->extension;
if (!rdev->link_state) {
if (!rdev->link_state)
netif_carrier_on(net);
netif_tx_wake_all_queues(net);
}
if (vf_netdev) {
/* Setting synthetic device up transparently sets
@ -142,36 +143,25 @@ static int netvsc_open(struct net_device *net)
return 0;
}
static int netvsc_close(struct net_device *net)
static int netvsc_wait_until_empty(struct netvsc_device *nvdev)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct net_device *vf_netdev
= rtnl_dereference(net_device_ctx->vf_netdev);
struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
int ret = 0;
u32 aread, i, msec = 10, retry = 0, retry_max = 20;
struct vmbus_channel *chn;
netif_tx_disable(net);
/* No need to close rndis filter if it is removed already */
if (!nvdev)
goto out;
ret = rndis_filter_close(nvdev);
if (ret != 0) {
netdev_err(net, "unable to close device (ret %d).\n", ret);
return ret;
}
unsigned int retry = 0;
int i;
/* Ensure pending bytes in ring are read */
while (true) {
aread = 0;
for (;;) {
u32 aread = 0;
for (i = 0; i < nvdev->num_chn; i++) {
chn = nvdev->chan_table[i].channel;
struct vmbus_channel *chn
= nvdev->chan_table[i].channel;
if (!chn)
continue;
/* make sure receive not running now */
napi_synchronize(&nvdev->chan_table[i].napi);
aread = hv_get_bytes_to_read(&chn->inbound);
if (aread)
break;
@ -181,22 +171,40 @@ static int netvsc_close(struct net_device *net)
break;
}
retry++;
if (retry > retry_max || aread == 0)
break;
if (aread == 0)
return 0;
msleep(msec);
if (++retry > RETRY_MAX)
return -ETIMEDOUT;
if (msec < 1000)
msec *= 2;
usleep_range(RETRY_US_LO, RETRY_US_HI);
}
}
static int netvsc_close(struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct net_device *vf_netdev
= rtnl_dereference(net_device_ctx->vf_netdev);
struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
int ret;
netif_tx_disable(net);
/* No need to close rndis filter if it is removed already */
if (!nvdev)
return 0;
ret = rndis_filter_close(nvdev);
if (ret != 0) {
netdev_err(net, "unable to close device (ret %d).\n", ret);
return ret;
}
if (aread) {
ret = netvsc_wait_until_empty(nvdev);
if (ret)
netdev_err(net, "Ring buffer not empty after closing rndis\n");
ret = -ETIMEDOUT;
}
out:
if (vf_netdev)
dev_close(vf_netdev);
@ -845,16 +853,81 @@ static void netvsc_get_channels(struct net_device *net,
}
}
static int netvsc_detach(struct net_device *ndev,
struct netvsc_device *nvdev)
{
struct net_device_context *ndev_ctx = netdev_priv(ndev);
struct hv_device *hdev = ndev_ctx->device_ctx;
int ret;
/* Don't try continuing to try and setup sub channels */
if (cancel_work_sync(&nvdev->subchan_work))
nvdev->num_chn = 1;
/* If device was up (receiving) then shutdown */
if (netif_running(ndev)) {
netif_tx_disable(ndev);
ret = rndis_filter_close(nvdev);
if (ret) {
netdev_err(ndev,
"unable to close device (ret %d).\n", ret);
return ret;
}
ret = netvsc_wait_until_empty(nvdev);
if (ret) {
netdev_err(ndev,
"Ring buffer not empty after closing rndis\n");
return ret;
}
}
netif_device_detach(ndev);
rndis_filter_device_remove(hdev, nvdev);
return 0;
}
static int netvsc_attach(struct net_device *ndev,
struct netvsc_device_info *dev_info)
{
struct net_device_context *ndev_ctx = netdev_priv(ndev);
struct hv_device *hdev = ndev_ctx->device_ctx;
struct netvsc_device *nvdev;
struct rndis_device *rdev;
int ret;
nvdev = rndis_filter_device_add(hdev, dev_info);
if (IS_ERR(nvdev))
return PTR_ERR(nvdev);
/* Note: enable and attach happen when sub-channels setup */
netif_carrier_off(ndev);
if (netif_running(ndev)) {
ret = rndis_filter_open(nvdev);
if (ret)
return ret;
rdev = nvdev->extension;
if (!rdev->link_state)
netif_carrier_on(ndev);
}
return 0;
}
static int netvsc_set_channels(struct net_device *net,
struct ethtool_channels *channels)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_device *dev = net_device_ctx->device_ctx;
struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
unsigned int orig, count = channels->combined_count;
struct netvsc_device_info device_info;
bool was_opened;
int ret = 0;
int ret;
/* We do not support separate count for rx, tx, or other */
if (count == 0 ||
@ -871,9 +944,6 @@ static int netvsc_set_channels(struct net_device *net,
return -EINVAL;
orig = nvdev->num_chn;
was_opened = rndis_filter_opened(nvdev);
if (was_opened)
rndis_filter_close(nvdev);
memset(&device_info, 0, sizeof(device_info));
device_info.num_chn = count;
@ -882,28 +952,17 @@ static int netvsc_set_channels(struct net_device *net,
device_info.recv_sections = nvdev->recv_section_cnt;
device_info.recv_section_size = nvdev->recv_section_size;
rndis_filter_device_remove(dev, nvdev);
ret = netvsc_detach(net, nvdev);
if (ret)
return ret;
nvdev = rndis_filter_device_add(dev, &device_info);
if (IS_ERR(nvdev)) {
ret = PTR_ERR(nvdev);
ret = netvsc_attach(net, &device_info);
if (ret) {
device_info.num_chn = orig;
nvdev = rndis_filter_device_add(dev, &device_info);
if (IS_ERR(nvdev)) {
netdev_err(net, "restoring channel setting failed: %ld\n",
PTR_ERR(nvdev));
return ret;
}
if (netvsc_attach(net, &device_info))
netdev_err(net, "restoring channel setting failed\n");
}
if (was_opened)
rndis_filter_open(nvdev);
/* We may have missed link change notifications */
net_device_ctx->last_reconfig = 0;
schedule_delayed_work(&net_device_ctx->dwork, 0);
return ret;
}
@ -969,10 +1028,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
struct net_device_context *ndevctx = netdev_priv(ndev);
struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
struct hv_device *hdev = ndevctx->device_ctx;
int orig_mtu = ndev->mtu;
struct netvsc_device_info device_info;
bool was_opened;
int ret = 0;
if (!nvdev || nvdev->destroy)
@ -985,11 +1042,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
return ret;
}
netif_device_detach(ndev);
was_opened = rndis_filter_opened(nvdev);
if (was_opened)
rndis_filter_close(nvdev);
memset(&device_info, 0, sizeof(device_info));
device_info.num_chn = nvdev->num_chn;
device_info.send_sections = nvdev->send_section_cnt;
@ -997,35 +1049,27 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
device_info.recv_sections = nvdev->recv_section_cnt;
device_info.recv_section_size = nvdev->recv_section_size;
rndis_filter_device_remove(hdev, nvdev);
ret = netvsc_detach(ndev, nvdev);
if (ret)
goto rollback_vf;
ndev->mtu = mtu;
nvdev = rndis_filter_device_add(hdev, &device_info);
if (IS_ERR(nvdev)) {
ret = PTR_ERR(nvdev);
ret = netvsc_attach(ndev, &device_info);
if (ret)
goto rollback;
/* Attempt rollback to original MTU */
ndev->mtu = orig_mtu;
nvdev = rndis_filter_device_add(hdev, &device_info);
return 0;
if (vf_netdev)
dev_set_mtu(vf_netdev, orig_mtu);
rollback:
/* Attempt rollback to original MTU */
ndev->mtu = orig_mtu;
if (IS_ERR(nvdev)) {
netdev_err(ndev, "restoring mtu failed: %ld\n",
PTR_ERR(nvdev));
return ret;
}
}
if (was_opened)
rndis_filter_open(nvdev);
netif_device_attach(ndev);
/* We may have missed link change notifications */
schedule_delayed_work(&ndevctx->dwork, 0);
if (netvsc_attach(ndev, &device_info))
netdev_err(ndev, "restoring mtu failed\n");
rollback_vf:
if (vf_netdev)
dev_set_mtu(vf_netdev, orig_mtu);
return ret;
}
@ -1531,11 +1575,9 @@ static int netvsc_set_ringparam(struct net_device *ndev,
{
struct net_device_context *ndevctx = netdev_priv(ndev);
struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
struct hv_device *hdev = ndevctx->device_ctx;
struct netvsc_device_info device_info;
struct ethtool_ringparam orig;
u32 new_tx, new_rx;
bool was_opened;
int ret = 0;
if (!nvdev || nvdev->destroy)
@ -1560,35 +1602,19 @@ static int netvsc_set_ringparam(struct net_device *ndev,
device_info.recv_sections = new_rx;
device_info.recv_section_size = nvdev->recv_section_size;
netif_device_detach(ndev);
was_opened = rndis_filter_opened(nvdev);
if (was_opened)
rndis_filter_close(nvdev);
rndis_filter_device_remove(hdev, nvdev);
nvdev = rndis_filter_device_add(hdev, &device_info);
if (IS_ERR(nvdev)) {
ret = PTR_ERR(nvdev);
ret = netvsc_detach(ndev, nvdev);
if (ret)
return ret;
ret = netvsc_attach(ndev, &device_info);
if (ret) {
device_info.send_sections = orig.tx_pending;
device_info.recv_sections = orig.rx_pending;
nvdev = rndis_filter_device_add(hdev, &device_info);
if (IS_ERR(nvdev)) {
netdev_err(ndev, "restoring ringparam failed: %ld\n",
PTR_ERR(nvdev));
return ret;
}
if (netvsc_attach(ndev, &device_info))
netdev_err(ndev, "restoring ringparam failed");
}
if (was_opened)
rndis_filter_open(nvdev);
netif_device_attach(ndev);
/* We may have missed link change notifications */
ndevctx->last_reconfig = 0;
schedule_delayed_work(&ndevctx->dwork, 0);
return ret;
}
@ -2072,8 +2098,8 @@ no_net:
static int netvsc_remove(struct hv_device *dev)
{
struct net_device_context *ndev_ctx;
struct net_device *vf_netdev;
struct net_device *net;
struct net_device *vf_netdev, *net;
struct netvsc_device *nvdev;
net = hv_get_drvdata(dev);
if (net == NULL) {
@ -2083,10 +2109,14 @@ static int netvsc_remove(struct hv_device *dev)
ndev_ctx = netdev_priv(net);
netif_device_detach(net);
cancel_delayed_work_sync(&ndev_ctx->dwork);
rcu_read_lock();
nvdev = rcu_dereference(ndev_ctx->nvdev);
if (nvdev)
cancel_work_sync(&nvdev->subchan_work);
/*
* Call to the vsc driver to let it know that the device is being
* removed. Also blocks mtu and channel changes.
@ -2096,11 +2126,13 @@ static int netvsc_remove(struct hv_device *dev)
if (vf_netdev)
netvsc_unregister_vf(vf_netdev);
if (nvdev)
rndis_filter_device_remove(dev, nvdev);
unregister_netdevice(net);
rndis_filter_device_remove(dev,
rtnl_dereference(ndev_ctx->nvdev));
rtnl_unlock();
rcu_read_unlock();
hv_set_drvdata(dev, NULL);

View File

@ -264,13 +264,23 @@ static void rndis_set_link_state(struct rndis_device *rdev,
}
}
static void rndis_filter_receive_response(struct rndis_device *dev,
struct rndis_message *resp)
static void rndis_filter_receive_response(struct net_device *ndev,
struct netvsc_device *nvdev,
const struct rndis_message *resp)
{
struct rndis_device *dev = nvdev->extension;
struct rndis_request *request = NULL;
bool found = false;
unsigned long flags;
struct net_device *ndev = dev->ndev;
/* This should never happen, it means control message
* response received after device removed.
*/
if (dev->state == RNDIS_DEV_UNINITIALIZED) {
netdev_err(ndev,
"got rndis message uninitialized\n");
return;
}
spin_lock_irqsave(&dev->request_lock, flags);
list_for_each_entry(request, &dev->req_list, list_ent) {
@ -352,7 +362,6 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type)
static int rndis_filter_receive_data(struct net_device *ndev,
struct netvsc_device *nvdev,
struct rndis_device *dev,
struct rndis_message *msg,
struct vmbus_channel *channel,
void *data, u32 data_buflen)
@ -372,7 +381,7 @@ static int rndis_filter_receive_data(struct net_device *ndev,
* should be the data packet size plus the trailer padding size
*/
if (unlikely(data_buflen < rndis_pkt->data_len)) {
netdev_err(dev->ndev, "rndis message buffer "
netdev_err(ndev, "rndis message buffer "
"overflow detected (got %u, min %u)"
"...dropping this message!\n",
data_buflen, rndis_pkt->data_len);
@ -400,35 +409,20 @@ int rndis_filter_receive(struct net_device *ndev,
void *data, u32 buflen)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct rndis_device *rndis_dev = net_dev->extension;
struct rndis_message *rndis_msg = data;
/* Make sure the rndis device state is initialized */
if (unlikely(!rndis_dev)) {
netif_dbg(net_device_ctx, rx_err, ndev,
"got rndis message but no rndis device!\n");
return NVSP_STAT_FAIL;
}
if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) {
netif_dbg(net_device_ctx, rx_err, ndev,
"got rndis message uninitialized\n");
return NVSP_STAT_FAIL;
}
if (netif_msg_rx_status(net_device_ctx))
dump_rndis_message(ndev, rndis_msg);
switch (rndis_msg->ndis_msg_type) {
case RNDIS_MSG_PACKET:
return rndis_filter_receive_data(ndev, net_dev,
rndis_dev, rndis_msg,
return rndis_filter_receive_data(ndev, net_dev, rndis_msg,
channel, data, buflen);
case RNDIS_MSG_INIT_C:
case RNDIS_MSG_QUERY_C:
case RNDIS_MSG_SET_C:
/* completion msgs */
rndis_filter_receive_response(rndis_dev, rndis_msg);
rndis_filter_receive_response(ndev, net_dev, rndis_msg);
break;
case RNDIS_MSG_INDICATE:
@ -1124,6 +1118,7 @@ void rndis_set_subchannel(struct work_struct *w)
for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
ndev_ctx->tx_table[i] = i % nvdev->num_chn;
netif_device_attach(ndev);
rtnl_unlock();
return;
@ -1134,6 +1129,8 @@ failed:
nvdev->max_chn = 1;
nvdev->num_chn = 1;
netif_device_attach(ndev);
unlock:
rtnl_unlock();
}
@ -1336,6 +1333,10 @@ out:
net_device->num_chn = 1;
}
/* No sub channels, device is ready */
if (net_device->num_chn == 1)
netif_device_attach(net);
return net_device;
err_dev_remv:
@ -1348,16 +1349,12 @@ void rndis_filter_device_remove(struct hv_device *dev,
{
struct rndis_device *rndis_dev = net_dev->extension;
/* Don't try and setup sub channels if about to halt */
cancel_work_sync(&net_dev->subchan_work);
/* Halt and release the rndis device */
rndis_filter_halt_device(rndis_dev);
net_dev->extension = NULL;
netvsc_device_remove(dev);
kfree(rndis_dev);
}
int rndis_filter_open(struct netvsc_device *nvdev)
@ -1375,10 +1372,3 @@ int rndis_filter_close(struct netvsc_device *nvdev)
return rndis_filter_close_device(nvdev->extension);
}
bool rndis_filter_opened(const struct netvsc_device *nvdev)
{
const struct rndis_device *dev = nvdev->extension;
return dev->state == RNDIS_DEV_DATAINITIALIZED;
}