From 867fa150f8f7ee6e9e5a9ab768e2d0dc675a968b Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 14 Mar 2017 10:24:39 -0700 Subject: [PATCH 1/5] ldmvsw: better use of link up and down on ldom vswitch When an ldom VM is bound, the network vswitch infrastructure is set up for it, but was being forced 'UP' by the userland switch configuration script. When 'UP' but not actually connected to a running VM, the ipv6 neighbor probes fail (not a horrible thing) and start cluttering up the kernel logs. Funny thing: these are debug messages that never actually show up, but we do see the net_ratelimited messages that say N callbacks were suppressed. This patch defers the netif_carrier_on() until an actual link has been established with the VM, as indicated by receiving an LDC_EVENT_UP from the underlying LDC protocol. Similarly, we take the link down when we see the LDC_EVENT_RESET. Now when we see the ndo_open(), we reset the link to get things talking again. Orabug: 25525312 Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/ldmvsw.c | 27 +++++++++++++++++++---- drivers/net/ethernet/sun/sunvnet_common.c | 20 ++++++++++++++--- drivers/net/ethernet/sun/sunvnet_common.h | 1 + 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 89952deae47f..5a90fed06260 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -1,6 +1,6 @@ /* ldmvsw.c: Sun4v LDOM Virtual Switch Driver. * - * Copyright (C) 2016 Oracle. All rights reserved. + * Copyright (C) 2016-2017 Oracle. All rights reserved. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -41,8 +41,8 @@ static u8 vsw_port_hwaddr[ETH_ALEN] = {0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; #define DRV_MODULE_NAME "ldmvsw" -#define DRV_MODULE_VERSION "1.1" -#define DRV_MODULE_RELDATE "February 3, 2017" +#define DRV_MODULE_VERSION "1.2" +#define DRV_MODULE_RELDATE "March 4, 2017" static char version[] = DRV_MODULE_NAME " " DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")"; @@ -123,6 +123,20 @@ static void vsw_set_rx_mode(struct net_device *dev) return sunvnet_set_rx_mode_common(dev, port->vp); } +int ldmvsw_open(struct net_device *dev) +{ + struct vnet_port *port = netdev_priv(dev); + struct vio_driver_state *vio = &port->vio; + + /* reset the channel */ + vio_link_state_change(vio, LDC_EVENT_RESET); + vnet_port_reset(port); + vio_port_up(vio); + + return 0; +} +EXPORT_SYMBOL_GPL(ldmvsw_open); + #ifdef CONFIG_NET_POLL_CONTROLLER static void vsw_poll_controller(struct net_device *dev) { @@ -133,7 +147,7 @@ static void vsw_poll_controller(struct net_device *dev) #endif static const struct net_device_ops vsw_ops = { - .ndo_open = sunvnet_open_common, + .ndo_open = ldmvsw_open, .ndo_stop = sunvnet_close_common, .ndo_set_rx_mode = vsw_set_rx_mode, .ndo_set_mac_address = sunvnet_set_mac_addr_common, @@ -365,6 +379,11 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) napi_enable(&port->napi); vio_port_up(&port->vio); + /* assure no carrier until we receive an LDC_EVENT_UP, + * even if the vsw config script tries to force us up + */ + netif_carrier_off(dev); + netdev_info(dev, "LDOM vsw-port %pM\n", dev->dev_addr); pr_info("%s: PORT ( remote-mac %pM%s )\n", dev->name, diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index fa2d11ca9b81..fbd0f1e5210e 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1,7 +1,7 @@ /* sunvnet.c: Sun LDOM Virtual Network Driver. * * Copyright (C) 2007, 2008 David S. Miller - * Copyright (C) 2016 Oracle. All rights reserved. + * Copyright (C) 2016-2017 Oracle. All rights reserved. */ #include @@ -43,7 +43,6 @@ MODULE_LICENSE("GPL"); MODULE_VERSION("1.1"); static int __vnet_tx_trigger(struct vnet_port *port, u32 start); -static void vnet_port_reset(struct vnet_port *port); static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr) { @@ -747,6 +746,13 @@ static int vnet_event_napi(struct vnet_port *port, int budget) /* RESET takes precedent over any other event */ if (port->rx_event & LDC_EVENT_RESET) { + /* a link went down */ + + if (port->vsw == 1) { + netif_tx_stop_all_queues(dev); + netif_carrier_off(dev); + } + vio_link_state_change(vio, LDC_EVENT_RESET); vnet_port_reset(port); vio_port_up(vio); @@ -766,6 +772,13 @@ static int vnet_event_napi(struct vnet_port *port, int budget) } if (port->rx_event & LDC_EVENT_UP) { + /* a link came up */ + + if (port->vsw == 1) { + netif_carrier_on(port->dev); + netif_tx_start_all_queues(port->dev); + } + vio_link_state_change(vio, LDC_EVENT_UP); port->rx_event = 0; return 0; @@ -1631,7 +1644,7 @@ void sunvnet_port_free_tx_bufs_common(struct vnet_port *port) } EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common); -static void vnet_port_reset(struct vnet_port *port) +void vnet_port_reset(struct vnet_port *port) { del_timer(&port->clean_timer); sunvnet_port_free_tx_bufs_common(port); @@ -1639,6 +1652,7 @@ static void vnet_port_reset(struct vnet_port *port) port->tso = (port->vsw == 0); /* no tso in vsw, misbehaves in bridge */ port->tsolen = 0; } +EXPORT_SYMBOL_GPL(vnet_port_reset); static int vnet_port_alloc_tx_ring(struct vnet_port *port) { diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h index ce5c824128a3..b21ef4704bd1 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.h +++ b/drivers/net/ethernet/sun/sunvnet_common.h @@ -139,6 +139,7 @@ int sunvnet_handle_attr_common(struct vio_driver_state *vio, void *arg); void sunvnet_handshake_complete_common(struct vio_driver_state *vio); int sunvnet_poll_common(struct napi_struct *napi, int budget); void sunvnet_port_free_tx_bufs_common(struct vnet_port *port); +void vnet_port_reset(struct vnet_port *port); bool sunvnet_port_is_up_common(struct vnet_port *vnet); void sunvnet_port_add_txq_common(struct vnet_port *port); void sunvnet_port_rm_txq_common(struct vnet_port *port); From 0f512c84544b9a8f8de53b6f4bc0c372c45d8693 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 14 Mar 2017 10:24:40 -0700 Subject: [PATCH 2/5] sunvnet: add stats to track ldom to ldom packets and bytes In this driver, there is a "port" created for the connection to each of the other ldoms; a netdev queue is mapped to each port, and they are collected under a single netdev. The generic netdev statistics show us all the traffic in and out of our network device, but don't show individual queue/port stats. This patch breaks out the traffic counts for the individual ports and gives us a little view into the state of those connections. Orabug: 25190537 Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunvnet.c | 116 +++++++++++++++++++++- drivers/net/ethernet/sun/sunvnet_common.c | 6 ++ drivers/net/ethernet/sun/sunvnet_common.h | 15 +++ 3 files changed, 136 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 4cc2571f71c6..0b95105f7060 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -1,7 +1,7 @@ /* sunvnet.c: Sun LDOM Virtual Network Driver. * * Copyright (C) 2007, 2008 David S. Miller - * Copyright (C) 2016 Oracle. All rights reserved. + * Copyright (C) 2016-2017 Oracle. All rights reserved. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -77,11 +77,125 @@ static void vnet_set_msglevel(struct net_device *dev, u32 value) vp->msg_enable = value; } +static const struct { + const char string[ETH_GSTRING_LEN]; +} ethtool_stats_keys[] = { + { "rx_packets" }, + { "tx_packets" }, + { "rx_bytes" }, + { "tx_bytes" }, + { "rx_errors" }, + { "tx_errors" }, + { "rx_dropped" }, + { "tx_dropped" }, + { "multicast" }, + { "rx_length_errors" }, + { "rx_frame_errors" }, + { "rx_missed_errors" }, + { "tx_carrier_errors" }, + { "nports" }, +}; + +static int vnet_get_sset_count(struct net_device *dev, int sset) +{ + struct vnet *vp = (struct vnet *)netdev_priv(dev); + + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(ethtool_stats_keys) + + (NUM_VNET_PORT_STATS * vp->nports); + default: + return -EOPNOTSUPP; + } +} + +static void vnet_get_strings(struct net_device *dev, u32 stringset, u8 *buf) +{ + struct vnet *vp = (struct vnet *)netdev_priv(dev); + struct vnet_port *port; + char *p = (char *)buf; + + switch (stringset) { + case ETH_SS_STATS: + memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); + p += sizeof(ethtool_stats_keys); + + rcu_read_lock(); + list_for_each_entry_rcu(port, &vp->port_list, list) { + snprintf(p, ETH_GSTRING_LEN, "p%u.%s-%pM", + port->q_index, port->switch_port ? "s" : "q", + port->raddr); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, "p%u.rx_packets", + port->q_index); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, "p%u.tx_packets", + port->q_index); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, "p%u.rx_bytes", + port->q_index); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, "p%u.tx_bytes", + port->q_index); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, "p%u.event_up", + port->q_index); + p += ETH_GSTRING_LEN; + snprintf(p, ETH_GSTRING_LEN, "p%u.event_reset", + port->q_index); + p += ETH_GSTRING_LEN; + } + rcu_read_unlock(); + break; + default: + WARN_ON(1); + break; + } +} + +static void vnet_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *estats, u64 *data) +{ + struct vnet *vp = (struct vnet *)netdev_priv(dev); + struct vnet_port *port; + int i = 0; + + data[i++] = dev->stats.rx_packets; + data[i++] = dev->stats.tx_packets; + data[i++] = dev->stats.rx_bytes; + data[i++] = dev->stats.tx_bytes; + data[i++] = dev->stats.rx_errors; + data[i++] = dev->stats.tx_errors; + data[i++] = dev->stats.rx_dropped; + data[i++] = dev->stats.tx_dropped; + data[i++] = dev->stats.multicast; + data[i++] = dev->stats.rx_length_errors; + data[i++] = dev->stats.rx_frame_errors; + data[i++] = dev->stats.rx_missed_errors; + data[i++] = dev->stats.tx_carrier_errors; + data[i++] = vp->nports; + + rcu_read_lock(); + list_for_each_entry_rcu(port, &vp->port_list, list) { + data[i++] = port->q_index; + data[i++] = port->stats.rx_packets; + data[i++] = port->stats.tx_packets; + data[i++] = port->stats.rx_bytes; + data[i++] = port->stats.tx_bytes; + data[i++] = port->stats.event_up; + data[i++] = port->stats.event_reset; + } + rcu_read_unlock(); +} + static const struct ethtool_ops vnet_ethtool_ops = { .get_drvinfo = vnet_get_drvinfo, .get_msglevel = vnet_get_msglevel, .set_msglevel = vnet_set_msglevel, .get_link = ethtool_op_get_link, + .get_sset_count = vnet_get_sset_count, + .get_strings = vnet_get_strings, + .get_ethtool_stats = vnet_get_ethtool_stats, }; static LIST_HEAD(vnet_list); diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index fbd0f1e5210e..7e95808f8227 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -411,6 +411,8 @@ static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc) dev->stats.rx_packets++; dev->stats.rx_bytes += len; + port->stats.rx_packets++; + port->stats.rx_bytes += len; napi_gro_receive(&port->napi, skb); return 0; @@ -768,6 +770,7 @@ static int vnet_event_napi(struct vnet_port *port, int budget) maybe_tx_wakeup(port); port->rx_event = 0; + port->stats.event_reset++; return 0; } @@ -781,6 +784,7 @@ static int vnet_event_napi(struct vnet_port *port, int budget) vio_link_state_change(vio, LDC_EVENT_UP); port->rx_event = 0; + port->stats.event_up++; return 0; } @@ -1430,6 +1434,8 @@ ldc_start_done: dev->stats.tx_packets++; dev->stats.tx_bytes += port->tx_bufs[txi].skb->len; + port->stats.tx_packets++; + port->stats.tx_bytes += port->tx_bufs[txi].skb->len; dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1); if (unlikely(vnet_tx_dring_avail(dr) < 1)) { diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h index b21ef4704bd1..c0fac03cb87a 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.h +++ b/drivers/net/ethernet/sun/sunvnet_common.h @@ -35,6 +35,19 @@ struct vnet_tx_entry { struct vnet; +struct vnet_port_stats { + /* keep them all the same size */ + u32 rx_bytes; + u32 tx_bytes; + u32 rx_packets; + u32 tx_packets; + u32 event_up; + u32 event_reset; + u32 q_placeholder; +}; + +#define NUM_VNET_PORT_STATS (sizeof(struct vnet_port_stats) / sizeof(u32)) + /* Structure to describe a vnet-port or vsw-port in the MD. * If the vsw bit is set, this structure represents a vswitch * port, and the net_device can be found from ->dev. If the @@ -44,6 +57,8 @@ struct vnet; struct vnet_port { struct vio_driver_state vio; + struct vnet_port_stats stats; + struct hlist_node hash; u8 raddr[ETH_ALEN]; unsigned switch_port:1; From e1f1e5f711265ee9d881afd12ff252b2d01e1174 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 14 Mar 2017 10:24:41 -0700 Subject: [PATCH 3/5] sunvnet: track port queues correctly Track our used and unused queue indexies correctly. Otherwise, as ports dropped out and returned, they all eventually ended up with the same queue index. Orabug: 25190537 Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunvnet_common.c | 24 +++++++++++++++++++---- drivers/net/ethernet/sun/sunvnet_common.h | 11 ++--------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 7e95808f8227..f8d1cc7e4977 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1728,11 +1728,25 @@ EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common); void sunvnet_port_add_txq_common(struct vnet_port *port) { struct vnet *vp = port->vp; - int n; + int smallest = 0; + int i; - n = vp->nports++; - n = n & (VNET_MAX_TXQS - 1); - port->q_index = n; + /* find the first least-used q + * When there are more ldoms than q's, we start to + * double up on ports per queue. + */ + for (i = 0; i < VNET_MAX_TXQS; i++) { + if (vp->q_used[i] == 0) { + smallest = i; + break; + } + if (vp->q_used[i] < vp->q_used[smallest]) + smallest = i; + } + + vp->nports++; + vp->q_used[smallest]++; + port->q_index = smallest; netif_tx_wake_queue(netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port), port->q_index)); } @@ -1743,5 +1757,7 @@ void sunvnet_port_rm_txq_common(struct vnet_port *port) port->vp->nports--; netif_tx_stop_queue(netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port), port->q_index)); + port->vp->q_used[port->q_index]--; + port->q_index = 0; } EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common); diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h index c0fac03cb87a..b20d6fa7ef25 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.h +++ b/drivers/net/ethernet/sun/sunvnet_common.h @@ -112,22 +112,15 @@ struct vnet_mcast_entry { }; struct vnet { - /* Protects port_list and port_hash. */ - spinlock_t lock; - + spinlock_t lock; /* Protects port_list and port_hash. */ struct net_device *dev; - u32 msg_enable; - + u8 q_used[VNET_MAX_TXQS]; struct list_head port_list; - struct hlist_head port_hash[VNET_PORT_HASH_SIZE]; - struct vnet_mcast_entry *mcast_list; - struct list_head list; u64 local_mac; - int nports; }; From b12a96f5cd04583f45a1b6554b8f3786b26db913 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 14 Mar 2017 10:24:42 -0700 Subject: [PATCH 4/5] sunvnet: count multicast packets Make sure multicast packets get counted in the device. Orabug: 25190537 Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunvnet_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index f8d1cc7e4977..bf7e0fbe5a9d 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -409,6 +409,8 @@ static int vnet_rx_one(struct vnet_port *port, struct vio_net_desc *desc) skb->ip_summed = port->switch_port ? CHECKSUM_NONE : CHECKSUM_PARTIAL; + if (unlikely(is_multicast_ether_addr(eth_hdr(skb)->h_dest))) + dev->stats.multicast++; dev->stats.rx_packets++; dev->stats.rx_bytes += len; port->stats.rx_packets++; From 9c5a3a1f9388100d4b03e85faf0cce8264985302 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 14 Mar 2017 10:24:43 -0700 Subject: [PATCH 5/5] sunvnet: xoff not needed when removing port link The sunvnet netdev is connected to the controlling ldom's vswitch for network bridging. However, for higher performance between ldoms, there also is a channel between each client ldom. These connections are represented in the sunvnet driver by a queue for each ldom. The driver uses select_queue to tell the stack which queue to use by tracking the mac addresses on the other end of each port. When a connected ldom shuts down, the driver receives an LDC_EVENT_RESET and the port is removed from the driver, thus a queue with no ldom on the other end will never be selected for Tx. The driver was trying to reinforce the "don't use this queue" notion with netif_tx_stop_queue() and netif_tx_wake_queue(), which really should only be used to signal a Tx queue is full (aka XOFF). This misuse of queue state resulted in NETDEV WATCHDOG messages and lots of unnecessary calls into the driver's tx_timeout handler. Simply removing these takes care of the problem. Orabug: 25190537 Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunvnet_common.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index bf7e0fbe5a9d..9e86833249d4 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1749,16 +1749,12 @@ void sunvnet_port_add_txq_common(struct vnet_port *port) vp->nports++; vp->q_used[smallest]++; port->q_index = smallest; - netif_tx_wake_queue(netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port), - port->q_index)); } EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common); void sunvnet_port_rm_txq_common(struct vnet_port *port) { port->vp->nports--; - netif_tx_stop_queue(netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port), - port->q_index)); port->vp->q_used[port->q_index]--; port->q_index = 0; }