From 6708c9e5cc9bfc7c9a00ce9c0fdd0b1d4952b3d1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 1 May 2013 22:36:49 +0000 Subject: [PATCH 01/25] net: use netdev_features_t in skb_needs_linearize() Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 4040673f806a..40b1fadaf637 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2456,7 +2456,7 @@ EXPORT_SYMBOL(netif_skb_features); * 2. skb is fragmented and the device does not support SG. */ static inline int skb_needs_linearize(struct sk_buff *skb, - int features) + netdev_features_t features) { return skb_is_nonlinear(skb) && ((skb_has_frag_list(skb) && From b29d3145183da4e07d4b570fa8acdd3ac4a5c572 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 1 May 2013 23:06:42 +0000 Subject: [PATCH 02/25] net: vlan,ethtool: netdev_features_t is more than 32 bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 2 +- net/core/ethtool.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 8af508536d36..3a8c8fd63c88 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -628,7 +628,7 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev, netdev_features_t features) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - u32 old_features = features; + netdev_features_t old_features = features; features &= real_dev->vlan_features; features |= NETIF_F_RXCSUM; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 5a934ef90f8b..22efdaa76ebf 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1421,7 +1421,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; - u32 old_features; + netdev_features_t old_features; if (!dev || !netif_device_present(dev)) return -ENODEV; From bbdc42f8147d19f71fe4bc3db8a83e03a21af7a4 Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Wed, 1 May 2013 09:37:17 +0000 Subject: [PATCH 03/25] be2net: Fix to use version 2 of cq_create for SkyHawk-R devices SkyHawk-R devices should use v2 of cq_create command. BE3/BE2 devices can use version 0 of the command to create a CQ. Signed-off-by: Ajit Khaparde Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 27 +++++++++++---------- drivers/net/ethernet/emulex/benet/be_cmds.h | 2 +- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 25d3290b8cac..6d848b607427 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -961,19 +961,8 @@ int be_cmd_cq_create(struct be_adapter *adapter, struct be_queue_info *cq, OPCODE_COMMON_CQ_CREATE, sizeof(*req), wrb, NULL); req->num_pages = cpu_to_le16(PAGES_4K_SPANNED(q_mem->va, q_mem->size)); - if (lancer_chip(adapter)) { - req->hdr.version = 2; - req->page_size = 1; /* 1 for 4K */ - AMAP_SET_BITS(struct amap_cq_context_lancer, nodelay, ctxt, - no_delay); - AMAP_SET_BITS(struct amap_cq_context_lancer, count, ctxt, - __ilog2_u32(cq->len/256)); - AMAP_SET_BITS(struct amap_cq_context_lancer, valid, ctxt, 1); - AMAP_SET_BITS(struct amap_cq_context_lancer, eventable, - ctxt, 1); - AMAP_SET_BITS(struct amap_cq_context_lancer, eqid, - ctxt, eq->id); - } else { + + if (BEx_chip(adapter)) { AMAP_SET_BITS(struct amap_cq_context_be, coalescwm, ctxt, coalesce_wm); AMAP_SET_BITS(struct amap_cq_context_be, nodelay, @@ -983,6 +972,18 @@ int be_cmd_cq_create(struct be_adapter *adapter, struct be_queue_info *cq, AMAP_SET_BITS(struct amap_cq_context_be, valid, ctxt, 1); AMAP_SET_BITS(struct amap_cq_context_be, eventable, ctxt, 1); AMAP_SET_BITS(struct amap_cq_context_be, eqid, ctxt, eq->id); + } else { + req->hdr.version = 2; + req->page_size = 1; /* 1 for 4K */ + AMAP_SET_BITS(struct amap_cq_context_v2, nodelay, ctxt, + no_delay); + AMAP_SET_BITS(struct amap_cq_context_v2, count, ctxt, + __ilog2_u32(cq->len/256)); + AMAP_SET_BITS(struct amap_cq_context_v2, valid, ctxt, 1); + AMAP_SET_BITS(struct amap_cq_context_v2, eventable, + ctxt, 1); + AMAP_SET_BITS(struct amap_cq_context_v2, eqid, + ctxt, eq->id); } be_dws_cpu_to_le(ctxt, sizeof(req->context)); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index a855668e0cc5..025bdb0d1764 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -381,7 +381,7 @@ struct amap_cq_context_be { u8 rsvd5[32]; /* dword 3*/ } __packed; -struct amap_cq_context_lancer { +struct amap_cq_context_v2 { u8 rsvd0[12]; /* dword 0*/ u8 coalescwm[2]; /* dword 0*/ u8 nodelay; /* dword 0*/ From a6c578ef9b6dc39efd8cd6e68295f63486d20c75 Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Wed, 1 May 2013 09:37:27 +0000 Subject: [PATCH 04/25] be2net: Fix to use 32-bit stats to report rx_drops_no_fragment Only BE devices provide 16-bit counter for rx_drops_no_fragment. All other devices provide a 32-bit counter for this stat. Use the 32-bit value where available. Signed-off-by: Ajit Khaparde Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 23 +++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 4babc8a4a543..e60e7f724144 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -471,11 +471,26 @@ static void accumulate_16bit_val(u32 *acc, u16 val) ACCESS_ONCE(*acc) = newacc; } +void populate_erx_stats(struct be_adapter *adapter, + struct be_rx_obj *rxo, + u32 erx_stat) +{ + if (!BEx_chip(adapter)) + rx_stats(rxo)->rx_drops_no_frags = erx_stat; + else + /* below erx HW counter can actually wrap around after + * 65535. Driver accumulates a 32-bit value + */ + accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags, + (u16)erx_stat); +} + void be_parse_stats(struct be_adapter *adapter) { struct be_erx_stats_v1 *erx = be_erx_stats_from_cmd(adapter); struct be_rx_obj *rxo; int i; + u32 erx_stat; if (lancer_chip(adapter)) { populate_lancer_stats(adapter); @@ -488,12 +503,8 @@ void be_parse_stats(struct be_adapter *adapter) /* as erx_v1 is longer than v0, ok to use v1 for v0 access */ for_all_rx_queues(adapter, rxo, i) { - /* below erx HW counter can actually wrap around after - * 65535. Driver accumulates a 32-bit value - */ - accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags, - (u16)erx->rx_drops_no_fragments \ - [rxo->q.id]); + erx_stat = erx->rx_drops_no_fragments[rxo->q.id]; + populate_erx_stats(adapter, rxo, erx_stat); } } } From b5adffc472243f6ce5adbeb946bd271c16b6ce2f Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Wed, 1 May 2013 09:38:00 +0000 Subject: [PATCH 05/25] be2net: Fix to show tx priority pause counter in ethtool -S The tx priority pause count is available with the driver. But we were not showing it. This change should fix it and display it in ethtool stats. Signed-off-by: Ajit Khaparde Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_ethtool.c | 1 + drivers/net/ethernet/emulex/benet/be_main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 5733cde88e2c..3d4461adb3b4 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -85,6 +85,7 @@ static const struct be_ethtool_stat et_stats[] = { {DRVSTAT_INFO(tx_pauseframes)}, {DRVSTAT_INFO(tx_controlframes)}, {DRVSTAT_INFO(rx_priority_pause_frames)}, + {DRVSTAT_INFO(tx_priority_pauseframes)}, /* Received packets dropped when an internal fifo going into * main packet buffer tank (PMEM) overflows. */ diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index e60e7f724144..591a8675a13d 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -410,6 +410,7 @@ static void populate_be_v1_stats(struct be_adapter *adapter) drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop; drvs->tx_pauseframes = port_stats->tx_pauseframes; drvs->tx_controlframes = port_stats->tx_controlframes; + drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes; drvs->jabber_events = port_stats->jabber_events; drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf; drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr; From c5dae588fd6d5c6eceaf25fda4a78698d7ad888c Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Wed, 1 May 2013 09:38:24 +0000 Subject: [PATCH 06/25] be2net: Fix to receive Multicast Packets when Promiscuous mode is enabled on certain devices When a user requests Promiscuous mode on SkyHawk-R devices, the FW enables Desparate Promiscuous mode. Due to this, we need to explicitly enable Multicast Promiscuous mode so that Multicast packets are received as well. This patch fixes that. Signed-off-by: Ajit Khaparde Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 6d848b607427..180c3b79f0b9 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1764,10 +1764,12 @@ int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) req->if_id = cpu_to_le32(adapter->if_handle); if (flags & IFF_PROMISC) { req->if_flags_mask = cpu_to_le32(BE_IF_FLAGS_PROMISCUOUS | - BE_IF_FLAGS_VLAN_PROMISCUOUS); + BE_IF_FLAGS_VLAN_PROMISCUOUS | + BE_IF_FLAGS_MCAST_PROMISCUOUS); if (value == ON) req->if_flags = cpu_to_le32(BE_IF_FLAGS_PROMISCUOUS | - BE_IF_FLAGS_VLAN_PROMISCUOUS); + BE_IF_FLAGS_VLAN_PROMISCUOUS | + BE_IF_FLAGS_MCAST_PROMISCUOUS); } else if (flags & IFF_ALLMULTI) { req->if_flags_mask = req->if_flags = cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS); From 701962d0bbc74cfe60a93389a3394ee358362a71 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 2 May 2013 03:36:34 +0000 Subject: [PATCH 07/25] be2net: Fix firmware download for Lancer Increasing the timeout value of write_object command to 60 seconds as 30 second timeout was found to be not enough for the command to complete. Signed-off-by: Kalesh AP Signed-off-by: Somnath Kotur Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 180c3b79f0b9..e1e5bb9d9054 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -2087,7 +2087,7 @@ int lancer_cmd_write_object(struct be_adapter *adapter, struct be_dma_mem *cmd, spin_unlock_bh(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->flash_compl, - msecs_to_jiffies(30000))) + msecs_to_jiffies(60000))) status = -1; else status = adapter->flash_status; From 04d3d6247fa39d4ee6488f1bd5314879ab9d5c9d Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 2 May 2013 03:36:55 +0000 Subject: [PATCH 08/25] be2net: avoid napi_disable() when it has not been enabled When RQ creation fails in be_open(), driver jumps to be_close() where napi_disable() is done without a prior napi_enable(); leading to a hang. This change is needed as there is no way to see if napi is enable/disabled. Signed-off-by: Kalesh AP Signed-off-by: Somnath Kotur Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 1 + drivers/net/ethernet/emulex/benet/be_main.c | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 234ce6f07544..f544b297c9ab 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -327,6 +327,7 @@ enum vf_state { #define BE_FLAGS_LINK_STATUS_INIT 1 #define BE_FLAGS_WORKER_SCHEDULED (1 << 3) +#define BE_FLAGS_NAPI_ENABLED (1 << 9) #define BE_UC_PMAC_COUNT 30 #define BE_VF_UC_PMAC_COUNT 2 #define BE_FLAGS_QNQ_ASYNC_EVT_RCVD (1 << 11) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 591a8675a13d..e6b5c4cd8c0c 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2548,8 +2548,11 @@ static int be_close(struct net_device *netdev) be_roce_dev_close(adapter); - for_all_evt_queues(adapter, eqo, i) - napi_disable(&eqo->napi); + if (adapter->flags & BE_FLAGS_NAPI_ENABLED) { + for_all_evt_queues(adapter, eqo, i) + napi_disable(&eqo->napi); + adapter->flags &= ~BE_FLAGS_NAPI_ENABLED; + } be_async_mcc_disable(adapter); @@ -2657,6 +2660,7 @@ static int be_open(struct net_device *netdev) napi_enable(&eqo->napi); be_eq_notify(adapter, eqo->q.id, true, false, 0); } + adapter->flags |= BE_FLAGS_NAPI_ENABLED; status = be_cmd_link_status_query(adapter, NULL, &link_status, 0); if (!status) From c2bba3dfc16e625e73f986a21110cb784cddd30a Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 2 May 2013 03:37:08 +0000 Subject: [PATCH 09/25] be2net: Fix to fail probe if MSI-X enable fails for a VF As per SPEC, INTx mode is not supported on VFs. So if enable_msix fails, then just fail probe. Also bail out of be_open if irq_register fails. Signed-off-by: Somnath Kotur Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 22 ++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index e6b5c4cd8c0c..6c52a60dcdb7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2390,7 +2390,7 @@ static uint be_num_rss_want(struct be_adapter *adapter) return num; } -static void be_msix_enable(struct be_adapter *adapter) +static int be_msix_enable(struct be_adapter *adapter) { #define BE_MIN_MSIX_VECTORS 1 int i, status, num_vec, num_roce_vec = 0; @@ -2415,13 +2415,17 @@ static void be_msix_enable(struct be_adapter *adapter) goto done; } else if (status >= BE_MIN_MSIX_VECTORS) { num_vec = status; - if (pci_enable_msix(adapter->pdev, adapter->msix_entries, - num_vec) == 0) + status = pci_enable_msix(adapter->pdev, adapter->msix_entries, + num_vec); + if (!status) goto done; } dev_warn(dev, "MSIx enable failed\n"); - return; + /* INTx is not supported in VFs, so fail probe if enable_msix fails */ + if (!be_physfn(adapter)) + return status; + return 0; done: if (be_roce_supported(adapter)) { if (num_vec > num_roce_vec) { @@ -2435,7 +2439,7 @@ done: } else adapter->num_msix_vec = num_vec; dev_info(dev, "enabled %d MSI-x vector(s)\n", adapter->num_msix_vec); - return; + return 0; } static inline int be_msix_vec_get(struct be_adapter *adapter, @@ -2646,7 +2650,9 @@ static int be_open(struct net_device *netdev) if (status) goto err; - be_irq_register(adapter); + status = be_irq_register(adapter); + if (status) + goto err; for_all_rx_queues(adapter, rxo, i) be_cq_notify(adapter, rxo->cq.id, true, 0); @@ -3116,7 +3122,9 @@ static int be_setup(struct be_adapter *adapter) if (status) goto err; - be_msix_enable(adapter); + status = be_msix_enable(adapter); + if (status) + goto err; status = be_evt_queues_create(adapter); if (status) From ac69c26e7accb04ae2cb9ab0872068983a42b3c8 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 2 May 2013 00:43:57 +0000 Subject: [PATCH 10/25] xen-netback: remove redundent parameter in netbk_count_requests Tracking down from the caller, first_idx is always equal to vif->tx.req_cons. Remove it to avoid confusion. Suggested-by: Jan Beulich Signed-off-by: Wei Liu Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index a2865f17c667..c44772d6bba0 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -928,7 +928,6 @@ static void netbk_fatal_tx_err(struct xenvif *vif) static int netbk_count_requests(struct xenvif *vif, struct xen_netif_tx_request *first, - RING_IDX first_idx, struct xen_netif_tx_request *txp, int work_to_do) { @@ -1005,7 +1004,7 @@ static int netbk_count_requests(struct xenvif *vif, } while ((txp++)->flags & XEN_NETTXF_more_data); if (drop_err) { - netbk_tx_err(vif, first, first_idx + slots); + netbk_tx_err(vif, first, cons + slots); return drop_err; } @@ -1470,8 +1469,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) continue; } - ret = netbk_count_requests(vif, &txreq, idx, - txfrags, work_to_do); + ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do); if (unlikely(ret < 0)) continue; From 59ccb4ebbc35e36a3c143f2d1355deb75c2e628f Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 2 May 2013 00:43:58 +0000 Subject: [PATCH 11/25] xen-netback: avoid allocating variable size array on stack Tune xen_netbk_count_requests to not touch working array beyond limit, so that we can make working array size constant. Suggested-by: Jan Beulich Signed-off-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index c44772d6bba0..ce8109f7d56f 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -934,11 +934,14 @@ static int netbk_count_requests(struct xenvif *vif, RING_IDX cons = vif->tx.req_cons; int slots = 0; int drop_err = 0; + int more_data; if (!(first->flags & XEN_NETTXF_more_data)) return 0; do { + struct xen_netif_tx_request dropped_tx = { 0 }; + if (slots >= work_to_do) { netdev_err(vif->dev, "Asked for %d slots but exceeds this limit\n", @@ -972,6 +975,9 @@ static int netbk_count_requests(struct xenvif *vif, drop_err = -E2BIG; } + if (drop_err) + txp = &dropped_tx; + memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), sizeof(*txp)); @@ -1001,7 +1007,13 @@ static int netbk_count_requests(struct xenvif *vif, netbk_fatal_tx_err(vif); return -EINVAL; } - } while ((txp++)->flags & XEN_NETTXF_more_data); + + more_data = txp->flags & XEN_NETTXF_more_data; + + if (!drop_err) + txp++; + + } while (more_data); if (drop_err) { netbk_tx_err(vif, first, cons + slots); @@ -1408,7 +1420,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) !list_empty(&netbk->net_schedule_list)) { struct xenvif *vif; struct xen_netif_tx_request txreq; - struct xen_netif_tx_request txfrags[max_skb_slots]; + struct xen_netif_tx_request txfrags[XEN_NETIF_NR_SLOTS_MIN]; struct page *page; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; u16 pending_idx; From 376414945d15aa636e65f7e773c1e398b7a21cb9 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 2 May 2013 00:43:59 +0000 Subject: [PATCH 12/25] xen-netback: better names for thresholds This patch only changes some names to avoid confusion. In this patch we have: MAX_SKB_SLOTS_DEFAULT -> FATAL_SKB_SLOTS_DEFAULT max_skb_slots -> fatal_skb_slots #define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN The fatal_skb_slots is the threshold to determine whether a packet is malicious. XEN_NETBK_LEGACY_SLOTS_MAX is the maximum slots a valid packet can have at this point. It is defined to be XEN_NETIF_NR_SLOTS_MIN because that's guaranteed to be supported by all backends. Suggested-by: Ian Campbell Signed-off-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 49 ++++++++++++++++++------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index ce8109f7d56f..37984e6d4e99 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -51,9 +51,17 @@ * This is the maximum slots a skb can have. If a guest sends a skb * which exceeds this limit it is considered malicious. */ -#define MAX_SKB_SLOTS_DEFAULT 20 -static unsigned int max_skb_slots = MAX_SKB_SLOTS_DEFAULT; -module_param(max_skb_slots, uint, 0444); +#define FATAL_SKB_SLOTS_DEFAULT 20 +static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; +module_param(fatal_skb_slots, uint, 0444); + +/* + * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating + * the maximum slots a valid packet can use. Now this value is defined + * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by + * all backend. + */ +#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN typedef unsigned int pending_ring_idx_t; #define INVALID_PENDING_RING_IDX (~0U) @@ -953,25 +961,26 @@ static int netbk_count_requests(struct xenvif *vif, /* This guest is really using too many slots and * considered malicious. */ - if (unlikely(slots >= max_skb_slots)) { + if (unlikely(slots >= fatal_skb_slots)) { netdev_err(vif->dev, "Malicious frontend using %d slots, threshold %u\n", - slots, max_skb_slots); + slots, fatal_skb_slots); netbk_fatal_tx_err(vif); return -E2BIG; } /* Xen network protocol had implicit dependency on - * MAX_SKB_FRAGS. XEN_NETIF_NR_SLOTS_MIN is set to the - * historical MAX_SKB_FRAGS value 18 to honor the same - * behavior as before. Any packet using more than 18 - * slots but less than max_skb_slots slots is dropped + * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to + * the historical MAX_SKB_FRAGS value 18 to honor the + * same behavior as before. Any packet using more than + * 18 slots but less than fatal_skb_slots slots is + * dropped */ - if (!drop_err && slots >= XEN_NETIF_NR_SLOTS_MIN) { + if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { if (net_ratelimit()) netdev_dbg(vif->dev, "Too many slots (%d) exceeding limit (%d), dropping packet\n", - slots, XEN_NETIF_NR_SLOTS_MIN); + slots, XEN_NETBK_LEGACY_SLOTS_MAX); drop_err = -E2BIG; } @@ -1053,7 +1062,7 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, struct pending_tx_info *first = NULL; /* At this point shinfo->nr_frags is in fact the number of - * slots, which can be as large as XEN_NETIF_NR_SLOTS_MIN. + * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. */ nr_slots = shinfo->nr_frags; @@ -1415,12 +1424,12 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) struct sk_buff *skb; int ret; - while ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN + while ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX < MAX_PENDING_REQS) && !list_empty(&netbk->net_schedule_list)) { struct xenvif *vif; struct xen_netif_tx_request txreq; - struct xen_netif_tx_request txfrags[XEN_NETIF_NR_SLOTS_MIN]; + struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; struct page *page; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; u16 pending_idx; @@ -1508,7 +1517,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) pending_idx = netbk->pending_ring[index]; data_len = (txreq.size > PKT_PROT_LEN && - ret < XEN_NETIF_NR_SLOTS_MIN) ? + ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, @@ -1787,7 +1796,7 @@ static inline int rx_work_todo(struct xen_netbk *netbk) static inline int tx_work_todo(struct xen_netbk *netbk) { - if ((nr_pending_reqs(netbk) + XEN_NETIF_NR_SLOTS_MIN + if ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX < MAX_PENDING_REQS) && !list_empty(&netbk->net_schedule_list)) return 1; @@ -1872,11 +1881,11 @@ static int __init netback_init(void) if (!xen_domain()) return -ENODEV; - if (max_skb_slots < XEN_NETIF_NR_SLOTS_MIN) { + if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { printk(KERN_INFO - "xen-netback: max_skb_slots too small (%d), bump it to XEN_NETIF_NR_SLOTS_MIN (%d)\n", - max_skb_slots, XEN_NETIF_NR_SLOTS_MIN); - max_skb_slots = XEN_NETIF_NR_SLOTS_MIN; + "xen-netback: fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", + fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); + fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; } xen_netbk_group_nr = num_online_cpus(); From af5c6df704af46f2cfebea329887f3d70ccb7b3d Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Thu, 2 May 2013 01:52:11 +0000 Subject: [PATCH 13/25] drivers: net: cpsw: irq not disabled in cpsw isr in particular sequence In CPSW NAPI, after processing all interrupts IRQ is enabled and then book keeping irq_enabled is updated. In random cases when a packet is transmitted or received between processing packets and IRQ enabled, then just after enabled IRQ and before irq_enabled is updated, ISR is called so IRQs are not disabled as irq_enabled is still false and CPU gets locked in CPSW ISR. By changing the sequence as update the irq_enabled and then enable IRQ fixes the issue. This issue is not captured always as it is a timing issue whether Tx or Rx IRQ is invoked between packet processing and enable IRQ. Cc: Sebastian Siewior Signed-off-by: Mugunthan V N Acked-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 59c43918883e..21a5b291b4b3 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -555,8 +555,8 @@ static int cpsw_poll(struct napi_struct *napi, int budget) cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); prim_cpsw = cpsw_get_slave_priv(priv, 0); if (prim_cpsw->irq_enabled == false) { - cpsw_enable_irq(priv); prim_cpsw->irq_enabled = true; + cpsw_enable_irq(priv); } } From e616071094c214a274fb66d0b297f8b25a1a34d7 Mon Sep 17 00:00:00 2001 From: Gerlando Falauto Date: Wed, 1 May 2013 12:04:44 +0000 Subject: [PATCH 14/25] tipc: cosmetic: clean up comments and break a long line Signed-off-by: Gerlando Falauto Signed-off-by: David S. Miller --- net/tipc/bcast.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 25e159c2feb4..0e2f4324f542 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -584,8 +584,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, { int bp_index; - /* - * Prepare broadcast link message for reliable transmission, + /* Prepare broadcast link message for reliable transmission, * if first time trying to send it; * preparation is skipped for broadcast link protocol messages * since they are sent in an unreliable manner and don't need it @@ -613,11 +612,12 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; if (!p) - break; /* no more bearers to try */ + break; /* No more bearers to try */ - tipc_nmap_diff(&bcbearer->remains, &p->nodes, &bcbearer->remains_new); + tipc_nmap_diff(&bcbearer->remains, &p->nodes, + &bcbearer->remains_new); if (bcbearer->remains_new.count == bcbearer->remains.count) - continue; /* bearer pair doesn't add anything */ + continue; /* Nothing added by bearer pair */ if (!tipc_bearer_blocked(p)) tipc_bearer_send(p, buf, &p->bcast_addr); @@ -628,13 +628,14 @@ static int tipc_bcbearer_send(struct sk_buff *buf, /* unable to send on either bearer */ continue; + /* Swap bearers for next packet */ if (s) { bcbearer->bpairs[bp_index].primary = s; bcbearer->bpairs[bp_index].secondary = p; } if (bcbearer->remains_new.count == 0) - break; /* all targets reached */ + break; /* All targets reached */ bcbearer->remains = bcbearer->remains_new; } From 77861d9c00900c0105b9d66ecf9fa612a43f8df5 Mon Sep 17 00:00:00 2001 From: Gerlando Falauto Date: Wed, 1 May 2013 12:04:45 +0000 Subject: [PATCH 15/25] tipc: tipc_bcbearer_send(): simplify bearer selection Signed-off-by: Gerlando Falauto Signed-off-by: David S. Miller --- net/tipc/bcast.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 0e2f4324f542..d9d848d488ee 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -610,23 +610,23 @@ static int tipc_bcbearer_send(struct sk_buff *buf, for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; + struct tipc_bearer *b = p; if (!p) break; /* No more bearers to try */ - tipc_nmap_diff(&bcbearer->remains, &p->nodes, + if (tipc_bearer_blocked(p)) { + if (!s || tipc_bearer_blocked(s)) + continue; /* Can't use either bearer */ + b = s; + } + + tipc_nmap_diff(&bcbearer->remains, &b->nodes, &bcbearer->remains_new); if (bcbearer->remains_new.count == bcbearer->remains.count) continue; /* Nothing added by bearer pair */ - if (!tipc_bearer_blocked(p)) - tipc_bearer_send(p, buf, &p->bcast_addr); - else if (s && !tipc_bearer_blocked(s)) - /* unable to send on primary bearer */ - tipc_bearer_send(s, buf, &s->bcast_addr); - else - /* unable to send on either bearer */ - continue; + tipc_bearer_send(b, buf, &b->bcast_addr); /* Swap bearers for next packet */ if (s) { From 488fc9af8267d0cd9036bc9db9f5dbbfde6de208 Mon Sep 17 00:00:00 2001 From: Gerlando Falauto Date: Wed, 1 May 2013 12:04:46 +0000 Subject: [PATCH 16/25] tipc: pskb_copy() buffers when sending on more than one bearer When sending packets, TIPC bearers use skb_clone() before writing their hardware header. This will however NOT copy the data buffer. So when the same packet is sent over multiple bearers (to reach multiple nodes), the same socket buffer data will be treated by multiple tipc_media drivers which will write their own hardware header through dev_hard_header(). Most of the time this is not a problem, because by the time the packet is processed by the second media, it has already been sent over the first one. However, when the first transmission is delayed (e.g. because of insufficient bandwidth or through a shaper), the next bearer will overwrite the hardware header, resulting in the packet being sent: a) with the wrong source address, when bearers of the same type, e.g. ethernet, are involved b) with a completely corrupt header, or even dropped, when bearers of different types are involved. So when the same socket buffer is to be sent multiple times, send a pskb_copy() instead (from the second instance on), and release it afterwards (the bearer will skb_clone() it anyway). Signed-off-by: Gerlando Falauto Signed-off-by: David S. Miller --- net/tipc/bcast.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index d9d848d488ee..e5f3da507823 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -611,6 +611,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; struct tipc_bearer *b = p; + struct sk_buff *tbuf; if (!p) break; /* No more bearers to try */ @@ -626,7 +627,17 @@ static int tipc_bcbearer_send(struct sk_buff *buf, if (bcbearer->remains_new.count == bcbearer->remains.count) continue; /* Nothing added by bearer pair */ - tipc_bearer_send(b, buf, &b->bcast_addr); + if (bp_index == 0) { + /* Use original buffer for first bearer */ + tipc_bearer_send(b, buf, &b->bcast_addr); + } else { + /* Avoid concurrent buffer access */ + tbuf = pskb_copy(buf, GFP_ATOMIC); + if (!tbuf) + break; + tipc_bearer_send(b, tbuf, &b->bcast_addr); + kfree_skb(tbuf); /* Bearer keeps a clone */ + } /* Swap bearers for next packet */ if (s) { From 83401eb4990ff6af55aeed8f49681558544192e6 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 2 May 2013 14:23:28 +0000 Subject: [PATCH 17/25] bridge: fix race with topology change timer A bridge should only send topology change notice if it is not the root bridge. It is possible for message age timer to elect itself as a new root bridge, and still have a topology change timer running but waiting for bridge lock on other CPU. Solve the race by checking if we are root bridge before continuing. This was the root cause of the cases where br_send_tcn_bpdu would OOPS. Reported-by: JerryKang Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_stp_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index c3530a81a33b..950663d4d330 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -107,7 +107,7 @@ static void br_tcn_timer_expired(unsigned long arg) br_debug(br, "tcn timer expired\n"); spin_lock(&br->lock); - if (br->dev->flags & IFF_UP) { + if (!br_is_root_bridge(br) && (br->dev->flags & IFF_UP)) { br_transmit_tcn(br); mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time); From 9b3eb5edf33897dc9128aa27300066153d4f8b9c Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 2 May 2013 16:14:19 +0000 Subject: [PATCH 18/25] gre: Fix GREv4 TCPv6 segmentation. For ipv6 traffic, GRE can generate packet with strange GSO bits, e.g. ipv4 packet with SKB_GSO_TCPV6 flag set. Therefore following patch relaxes check in inet gso handler to allow such packet for segmentation. This patch also fixes wrong skb->protocol set that was done in gre_gso_segment() handler. Reported-by: Steinar H. Gunderson CC: Eric Dumazet Signed-off-by: Pravin B Shelar Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 1 + net/ipv4/gre.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c61b3bb87a16..d01be2a3ae53 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1293,6 +1293,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | 0))) goto out; diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index d2d5a99fba09..cc22363965d2 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -121,6 +121,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, int ghl = GRE_HEADER_SECTION; struct gre_base_hdr *greh; int mac_len = skb->mac_len; + __be16 protocol = skb->protocol; int tnl_hlen; bool csum; @@ -150,7 +151,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, /* setup inner skb. */ if (greh->protocol == htons(ETH_P_TEB)) { - struct ethhdr *eth = eth_hdr(skb); + struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); skb->protocol = eth->h_proto; } else { skb->protocol = greh->protocol; @@ -199,6 +200,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); skb->mac_len = mac_len; + skb->protocol = protocol; } while ((skb = skb->next)); out: return segs; From 0d05535d413322341f89c77f32936b4df296ac5a Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 2 May 2013 16:17:02 +0000 Subject: [PATCH 19/25] vxlan: Fix TCPv6 segmentation. This patch set correct skb->protocol so that inner packet can lookup correct gso handler. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/ipv4/udp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6abbe6455129..0ae038a4c7a8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2311,8 +2311,10 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); int mac_len = skb->mac_len; int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - int outer_hlen; + struct ethhdr *inner_eth = (struct ethhdr *)skb_inner_mac_header(skb); + __be16 protocol = skb->protocol; netdev_features_t enc_features; + int outer_hlen; if (unlikely(!pskb_may_pull(skb, tnl_hlen))) goto out; @@ -2322,6 +2324,8 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); skb->mac_len = skb_inner_network_offset(skb); + inner_eth = (struct ethhdr *)skb_mac_header(skb); + skb->protocol = inner_eth->h_proto; /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); @@ -2358,6 +2362,7 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, } skb->ip_summed = CHECKSUM_NONE; + skb->protocol = protocol; } while ((skb = skb->next)); out: return segs; From 3d64fc705318e09d2c8544cec7169e45837a33dc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 2 May 2013 20:44:20 +0000 Subject: [PATCH 20/25] usbnet: pegasus: endian bug in write_mii_word() We're only passing the two high bytes of an integer. It works for little endian but not for big endian. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 09699054b54f..03e8a15d7deb 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -256,8 +256,9 @@ static int mdio_read(struct net_device *dev, int phy_id, int loc) static void mdio_write(struct net_device *dev, int phy_id, int loc, int val) { pegasus_t *pegasus = netdev_priv(dev); + u16 data = val; - write_mii_word(pegasus, phy_id, loc, (__u16 *)&val); + write_mii_word(pegasus, phy_id, loc, &data); } static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) From 0decc64b181ddf2eedb0d6f4423c2dedd7d49897 Mon Sep 17 00:00:00 2001 From: Teppo Kotilainen Date: Thu, 2 May 2013 23:05:13 +0000 Subject: [PATCH 21/25] net: qmi_wwan: Add Telewell TW-LTE 4G MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Information from driver description files: diag: VID_19D2&PID_0412&MI_00 nmea: VID_19D2&PID_0412&MI_01 at: VID_19D2&PID_0412&MI_02 modem: VID_19D2&PID_0412&MI_03 net: VID_19D2&PID_0412&MI_04 Signed-off-by: Teppo Kotilainen Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 5a88e72090ce..834e405fb57a 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -548,6 +548,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x0265, 4)}, /* ONDA MT8205 4G LTE */ {QMI_FIXED_INTF(0x19d2, 0x0284, 4)}, /* ZTE MF880 */ {QMI_FIXED_INTF(0x19d2, 0x0326, 4)}, /* ZTE MF821D */ + {QMI_FIXED_INTF(0x19d2, 0x0412, 4)}, /* Telewell TW-LTE 4G */ {QMI_FIXED_INTF(0x19d2, 0x1008, 4)}, /* ZTE (Vodafone) K3570-Z */ {QMI_FIXED_INTF(0x19d2, 0x1010, 4)}, /* ZTE (Vodafone) K3571-Z */ {QMI_FIXED_INTF(0x19d2, 0x1012, 4)}, From c5060cec6ba27ad3f0e7facfdf05d2f18e3e3010 Mon Sep 17 00:00:00 2001 From: "holger@eitzenberger.org" Date: Fri, 3 May 2013 00:02:20 +0000 Subject: [PATCH 22/25] asix: fix BUG in receive path when lowering MTU There is bug in the receive path of the asix driver at the time a packet is received larger than MTU size and DF bit set: BUG: unable to handle kernel paging request at 0000004000000001 IP: [] skb_release_head_state+0x2d/0xd2 ... Call Trace: [] ? skb_release_all+0x9/0x1e [] ? __kfree_skb+0x9/0x6f [] ? asix_rx_fixup_internal+0xff/0x1ae [asix] [] ? usbnet_bh+0x4f/0x226 [usbnet] ... It is easily reproducable by setting an MTU of 512 e. g. and sending something like ping -s 1472 -c 1 -M do $SELF from another box. And this is because the rx->ax_skb is freed on error, but rx->ax_skb is not reset, and the size is not reset to zero in this case. And since the skb is added again to the usbnet->done skb queue it is accessing already freed memory, resulting in the BUG when freeing a 2nd time. I therefore think the value 0x0000004000000001 show in the trace is more or less random data. Signed-off-by: Holger Eitzenberger Signed-off-by: David S. Miller --- drivers/net/usb/asix_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index f7f623a5390e..577c72d5f369 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -100,6 +100,9 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb, netdev_err(dev->net, "asix_rx_fixup() Bad RX Length %d\n", rx->size); kfree_skb(rx->ax_skb); + rx->ax_skb = NULL; + rx->size = 0U; + return 0; } From 8da3056c04bfc5f69f840ab038a38389e2de8189 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 3 May 2013 02:57:00 +0000 Subject: [PATCH 23/25] packet: tpacket_v3: do not trigger bug() on wrong header status Jakub reported that it is fairly easy to trigger the BUG() macro from user space with TPACKET_V3's RX_RING by just giving a wrong header status flag. We already had a similar situation in commit 7f5c3e3a80e6654 (``af_packet: remove BUG statement in tpacket_destruct_skb'') where this was the case in the TX_RING side that could be triggered from user space. So really, don't use BUG() or BUG_ON() unless there's really no way out, and i.e. don't use it for consistency checking when there's user space involved, no excuses, especially not if you're slapping the user with WARN + dump_stack + BUG all at once. The two functions are of concern: prb_retire_current_block() [when block status != TP_STATUS_KERNEL] prb_open_block() [when block_status != TP_STATUS_KERNEL] Calls to prb_open_block() are guarded by ealier checks if block_status is really TP_STATUS_KERNEL (racy!), but the first one BUG() is easily triggable from user space. System behaves still stable after they are removed. Also remove that yoda condition entirely, since it's already guarded. Reported-by: Jakub Zawadzki Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/packet/af_packet.c | 53 ++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index dd5cd49b0e09..8ec1bca7f859 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -742,36 +742,33 @@ static void prb_open_block(struct tpacket_kbdq_core *pkc1, smp_rmb(); - if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) { + /* We could have just memset this but we will lose the + * flexibility of making the priv area sticky + */ - /* We could have just memset this but we will lose the - * flexibility of making the priv area sticky - */ - BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; - BLOCK_NUM_PKTS(pbd1) = 0; - BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - getnstimeofday(&ts); - h1->ts_first_pkt.ts_sec = ts.tv_sec; - h1->ts_first_pkt.ts_nsec = ts.tv_nsec; - pkc1->pkblk_start = (char *)pbd1; - pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; - pbd1->version = pkc1->version; - pkc1->prev = pkc1->nxt_offset; - pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; - prb_thaw_queue(pkc1); - _prb_refresh_rx_retire_blk_timer(pkc1); + BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; + BLOCK_NUM_PKTS(pbd1) = 0; + BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - smp_wmb(); + getnstimeofday(&ts); - return; - } + h1->ts_first_pkt.ts_sec = ts.tv_sec; + h1->ts_first_pkt.ts_nsec = ts.tv_nsec; - WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n", - pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num); - dump_stack(); - BUG(); + pkc1->pkblk_start = (char *)pbd1; + pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + + BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; + + pbd1->version = pkc1->version; + pkc1->prev = pkc1->nxt_offset; + pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; + + prb_thaw_queue(pkc1); + _prb_refresh_rx_retire_blk_timer(pkc1); + + smp_wmb(); } /* @@ -862,10 +859,6 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, prb_close_block(pkc, pbd, po, status); return; } - - WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd); - dump_stack(); - BUG(); } static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, From 88dccf5b344163645d694d74cffc5a7e64a385bb Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Fri, 3 May 2013 04:22:04 +0000 Subject: [PATCH 24/25] sky2: Fix crash on receiving VLAN frames After recent 86a9bad3 (net: vlan: add protocol argument to packet tagging functions) my sky2 started to crash on receive of tagged frames, with backtrace similar to #CRASH!!! vlan_do_receive __netif_receive_skb_core __netif_receive_skb netif_receive_skb sky2_poll ... __net_rx_action __do_softirq The problem turned out to be: 1) sky2 copies small packets from ring on RX, and in its receive_copy() skb header is copied manually field, by field, and only for some fields; 2) 86a9bad3 added skb->vlan_proto, which vlan_untag() or __vlan_hwaccel_put_tag() set, and which is later used in vlan_do_receive(). That patch updated copy_skb_header() for newly introduced skb->vlan_proto, but overlooked the need to also copy it in sky2's receive_copy(). Because of 2, we have the following scenario: - frame is received and tagged in a ring, by sky2_rx_tag(). Both skb->vlan_proto and skb->vlan_tci are set; - later skb is decided to be copied, but skb->vlan_proto is forgotten and becomes 0. - in the beginning of vlan_do_receive() we call __be16 vlan_proto = skb->vlan_proto; vlan_dev = vlan_find_dev(skb->dev, vlan_proto, vlan_id); which eventually invokes vlan_proto_idx(vlan_proto) and that routine BUGs for everything except ETH_P_8021Q and ETH_P_8021AD. Oops. Fix it. P.S. Stephen, I wonder, why copy_skb_header() is not used in sky2.c::receive_copy() ? Problems, where receive_copy was updated field by field showed several times already, e.g. 3f42941b (sky2: propogate rx hash when packet is copied) e072b3fa (sky2: fix receive length error in mixed non-VLAN/VLAN traffic) Cc: Patrick McHardy Cc: Stephen Hemminger Cc: Mirko Lindner Signed-off-by: Kirill Smelkov Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 256ae789c143..d175bbd3ffd3 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -2496,10 +2496,12 @@ static struct sk_buff *receive_copy(struct sky2_port *sky2, skb->ip_summed = re->skb->ip_summed; skb->csum = re->skb->csum; skb->rxhash = re->skb->rxhash; + skb->vlan_proto = re->skb->vlan_proto; skb->vlan_tci = re->skb->vlan_tci; pci_dma_sync_single_for_device(sky2->hw->pdev, re->data_addr, length, PCI_DMA_FROMDEVICE); + re->skb->vlan_proto = 0; re->skb->vlan_tci = 0; re->skb->rxhash = 0; re->skb->ip_summed = CHECKSUM_NONE; From 777c2300865cb9b1b1791862ed23da677abfe6dc Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 3 May 2013 08:11:04 +0000 Subject: [PATCH 25/25] cxgb4: fix error recovery when t4_fw_hello returns a positive value Since commit 636f9d371f70f22961fd598fe18380057518ca31 ("cxgb4: Add support for T4 configuration file"), t4_fw_hello may return a positive value instead of 0 for success. The recovery code tests only for zero and fails recovery for any other value. This fix tests for negative error values and fails only on those cases. Error recovery after an error injection works after this change. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c59ec3ddaa66..3cd397d60434 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5204,7 +5204,7 @@ static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev) if (t4_wait_dev_ready(adap) < 0) return PCI_ERS_RESULT_DISCONNECT; - if (t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, NULL)) + if (t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, NULL) < 0) return PCI_ERS_RESULT_DISCONNECT; adap->flags |= FW_OK; if (adap_init1(adap, &c))