From b6fef4c6b8c1994ffe050dcdb9391427bf0d9882 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 21 Nov 2014 19:37:09 -0800 Subject: [PATCH 01/29] ipv6: Do not treat a GSO_TCPV4 request from UDP tunnel over IPv6 as invalid This patch adds SKB_GSO_TCPV4 to the list of supported GSO types handled by the IPv6 GSO offloads. Without this change VXLAN tunnels running over IPv6 do not currently handle IPv4 TCP TSO requests correctly and end up handing the non-segmented frame off to the device. Below is the before and after for a simple netperf TCP_STREAM test between two endpoints tunneling IPv4 over a VXLAN tunnel running on IPv6 on top of a 1Gb/s network adapter. Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.29 0.88 Before 87380 16384 16384 10.03 895.69 After Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv6/ip6_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index a071563a7e6e..01e12d0d8fcc 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -69,7 +69,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int nhoff; if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_UDP | + ~(SKB_GSO_TCPV4 | + SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | From 4556dc591691fca743518edb24f15fbc83b5c8ef Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 21 Nov 2014 23:52:52 -0800 Subject: [PATCH 02/29] ixgbe: Correctly disable VLAN filter in promiscuous mode IXGBE adapter seems to require that VLAN filtering be enabled if VMDQ or SRIOV are enabled. When those functions are disabled, VLAN filtering may be disabled in promiscuous mode. Prior to commit a9b8943ee129 ("ixgbe: remove vlan_filter_disable and enable functions") The logic was correct. However, after the commit the logic got reversed and VLAN filtered in now turned on when VMDQ/SRIOV is disabled. This patch changes the condition to enable hw vlan filtered when VMDQ or SRIOV is enabled. Fixes: a9b8943ee129 ("ixgbe: remove vlan_filter_disable and enable functions") Cc: stable CC: Jacob Keller Signed-off-by: Vladislav Yasevich Acked-by: Emil Tantilov Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d2df4e3d1032..7acde46df192 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3936,8 +3936,8 @@ void ixgbe_set_rx_mode(struct net_device *netdev) * if SR-IOV and VMDQ are disabled - otherwise ensure * that hardware VLAN filters remain enabled. */ - if (!(adapter->flags & (IXGBE_FLAG_VMDQ_ENABLED | - IXGBE_FLAG_SRIOV_ENABLED))) + if (adapter->flags & (IXGBE_FLAG_VMDQ_ENABLED | + IXGBE_FLAG_SRIOV_ENABLED)) vlnctrl |= (IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); } else { if (netdev->flags & IFF_ALLMULTI) { From b5b2ffc0574e1f271d79b6b992ee382dc9d5eaa8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Nov 2014 23:52:53 -0800 Subject: [PATCH 03/29] ixgbe: fix use after free adapter->state test in ixgbe_remove/ixgbe_probe While working on a different issue, I noticed an annoying use after free bug on my machine when unloading the ixgbe driver: [ 8642.318797] ixgbe 0000:02:00.1: removed PHC on p2p2 [ 8642.742716] ixgbe 0000:02:00.1: complete [ 8642.743784] BUG: unable to handle kernel paging request at ffff8807d3740a90 [ 8642.744828] IP: [] ixgbe_remove+0xfc/0x1b0 [ixgbe] [ 8642.745886] PGD 20c6067 PUD 81c1f6067 PMD 81c15a067 PTE 80000007d3740060 [ 8642.746956] Oops: 0002 [#1] SMP DEBUG_PAGEALLOC [ 8642.748039] Modules linked in: [...] [ 8642.752929] CPU: 1 PID: 1225 Comm: rmmod Not tainted 3.18.0-rc2+ #49 [ 8642.754203] Hardware name: Supermicro X10SLM-F/X10SLM-F, BIOS 1.1b 11/01/2013 [ 8642.755505] task: ffff8807e34d3fe0 ti: ffff8807b7204000 task.ti: ffff8807b7204000 [ 8642.756831] RIP: 0010:[] [] ixgbe_remove+0xfc/0x1b0 [ixgbe] [...] [ 8642.774335] Stack: [ 8642.775805] ffff8807ee824098 ffff8807ee824098 ffffffffa01f3000 ffff8807ee824000 [ 8642.777326] ffff8807b7207e18 ffffffff8137720f ffff8807ee824098 ffff8807ee824098 [ 8642.778848] ffffffffa01f3068 ffff8807ee8240f8 ffff8807b7207e38 ffffffff8144180f [ 8642.780365] Call Trace: [ 8642.781869] [] pci_device_remove+0x3f/0xc0 [ 8642.783395] [] __device_release_driver+0x7f/0xf0 [ 8642.784876] [] driver_detach+0xb8/0xc0 [ 8642.786352] [] bus_remove_driver+0x59/0xe0 [ 8642.787783] [] driver_unregister+0x30/0x70 [ 8642.789202] [] pci_unregister_driver+0x25/0xa0 [ 8642.790657] [] ixgbe_exit_module+0x1c/0xc8e [ixgbe] [ 8642.792064] [] SyS_delete_module+0x132/0x1c0 [ 8642.793450] [] ? do_notify_resume+0x61/0xa0 [ 8642.794837] [] system_call_fastpath+0x12/0x17 The issue is that test_and_set_bit() done on adapter->state is being performed *after* the netdevice has been freed via free_netdev(). When netdev is being allocated on initialization time, it allocates a private area, here struct ixgbe_adapter, that resides after the net_device structure. In ixgbe_probe(), the device init routine, we set up the adapter after alloc_etherdev_mq() on the private area and add a reference for the pci_dev as well via pci_set_drvdata(). Both in the error path of ixgbe_probe(), but also on module unload when ixgbe_remove() is being called, commit 41c62843eb6a ("ixgbe: Fix rcu warnings induced by LER") accesses adapter after free_netdev(). The patch stores the result in a bool and thus fixes above oops on my side. Fixes: 41c62843eb6a ("ixgbe: Fix rcu warnings induced by LER") Cc: stable Cc: Mark Rustad Signed-off-by: Daniel Borkmann Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7acde46df192..82ffe8bdb898 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7979,6 +7979,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) int i, err, pci_using_dac, expected_gts; unsigned int indices = MAX_TX_QUEUES; u8 part_str[IXGBE_PBANUM_LENGTH]; + bool disable_dev = false; #ifdef IXGBE_FCOE u16 device_caps; #endif @@ -8369,13 +8370,14 @@ err_sw_init: iounmap(adapter->io_addr); kfree(adapter->mac_table); err_ioremap: + disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); free_netdev(netdev); err_alloc_etherdev: pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); err_pci_reg: err_dma: - if (!adapter || !test_and_set_bit(__IXGBE_DISABLED, &adapter->state)) + if (!adapter || disable_dev) pci_disable_device(pdev); return err; } @@ -8393,6 +8395,7 @@ static void ixgbe_remove(struct pci_dev *pdev) { struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); struct net_device *netdev = adapter->netdev; + bool disable_dev; ixgbe_dbg_adapter_exit(adapter); @@ -8442,11 +8445,12 @@ static void ixgbe_remove(struct pci_dev *pdev) e_dev_info("complete\n"); kfree(adapter->mac_table); + disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); free_netdev(netdev); pci_disable_pcie_error_reporting(pdev); - if (!test_and_set_bit(__IXGBE_DISABLED, &adapter->state)) + if (disable_dev) pci_disable_device(pdev); } From 17a402a0075c7848d940eb846f8db1da6a832c5d Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Fri, 21 Nov 2014 23:52:54 -0800 Subject: [PATCH 04/29] igb: Fixes needed for surprise removal support This patch adds some checks in order to prevent panic's on surprise removal of devices during S0, S3, S4. Without this patch, Thunderbolt type device removal will panic the system. Signed-off-by: Yanir Lubetkin Signed-off-by: Carolyn Wyborny Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_main.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a2d72a87cbde..487cd9c4ac0d 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1012,7 +1012,8 @@ static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx) /* igb_get_stats64() might access the rings on this vector, * we must wait a grace period before freeing it. */ - kfree_rcu(q_vector, rcu); + if (q_vector) + kfree_rcu(q_vector, rcu); } /** @@ -1792,8 +1793,10 @@ void igb_down(struct igb_adapter *adapter) adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; for (i = 0; i < adapter->num_q_vectors; i++) { - napi_synchronize(&(adapter->q_vector[i]->napi)); - napi_disable(&(adapter->q_vector[i]->napi)); + if (adapter->q_vector[i]) { + napi_synchronize(&adapter->q_vector[i]->napi); + napi_disable(&adapter->q_vector[i]->napi); + } } @@ -3717,7 +3720,8 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter) int i; for (i = 0; i < adapter->num_tx_queues; i++) - igb_free_tx_resources(adapter->tx_ring[i]); + if (adapter->tx_ring[i]) + igb_free_tx_resources(adapter->tx_ring[i]); } void igb_unmap_and_free_tx_resource(struct igb_ring *ring, @@ -3782,7 +3786,8 @@ static void igb_clean_all_tx_rings(struct igb_adapter *adapter) int i; for (i = 0; i < adapter->num_tx_queues; i++) - igb_clean_tx_ring(adapter->tx_ring[i]); + if (adapter->tx_ring[i]) + igb_clean_tx_ring(adapter->tx_ring[i]); } /** @@ -3819,7 +3824,8 @@ static void igb_free_all_rx_resources(struct igb_adapter *adapter) int i; for (i = 0; i < adapter->num_rx_queues; i++) - igb_free_rx_resources(adapter->rx_ring[i]); + if (adapter->rx_ring[i]) + igb_free_rx_resources(adapter->rx_ring[i]); } /** @@ -3874,7 +3880,8 @@ static void igb_clean_all_rx_rings(struct igb_adapter *adapter) int i; for (i = 0; i < adapter->num_rx_queues; i++) - igb_clean_rx_ring(adapter->rx_ring[i]); + if (adapter->rx_ring[i]) + igb_clean_rx_ring(adapter->rx_ring[i]); } /** @@ -7404,6 +7411,8 @@ static int igb_resume(struct device *dev) pci_restore_state(pdev); pci_save_state(pdev); + if (!pci_device_is_present(pdev)) + return -ENODEV; err = pci_enable_device_mem(pdev); if (err) { dev_err(&pdev->dev, From 73112f9b08ddb32c5cdaccda61bd88dfe9baf8b2 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 22 Nov 2014 15:39:17 +0100 Subject: [PATCH 05/29] solos-pci: fix error return code Return a negative error code on failure. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier ret; expression e1,e2; @@ ( if (\(ret < 0\|ret != 0\)) { ... return ret; } | ret = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- drivers/atm/solos-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 7652e8dc188f..21b0bc6a9c96 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -1225,11 +1225,13 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) card->config_regs = pci_iomap(dev, 0, CONFIG_RAM_SIZE); if (!card->config_regs) { dev_warn(&dev->dev, "Failed to ioremap config registers\n"); + err = -ENOMEM; goto out_release_regions; } card->buffers = pci_iomap(dev, 1, DATA_RAM_SIZE); if (!card->buffers) { dev_warn(&dev->dev, "Failed to ioremap data buffers\n"); + err = -ENOMEM; goto out_unmap_config; } From 5ac6c72e594471acfa5b00210c51d533a73413ad Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 21 Oct 2014 16:12:18 +0300 Subject: [PATCH 06/29] iwlwifi: mvm: check TLV flag before trying to use hotspot firmware commands Older firmwares do not provide support for the HOT_SPOT_CMD command. Check for the appropriate TLV flag that declares hotspot support in the firmware to prevent a firmware assertion failure that can be triggered from the userspace, Cc: stable@vger.kernel.org [3.17+] Signed-off-by: Luciano Coelho Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/iwl-fw.h | 2 ++ drivers/net/wireless/iwlwifi/mvm/mac80211.c | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-fw.h b/drivers/net/wireless/iwlwifi/iwl-fw.h index 4f6e66892acc..b894a84e8393 100644 --- a/drivers/net/wireless/iwlwifi/iwl-fw.h +++ b/drivers/net/wireless/iwlwifi/iwl-fw.h @@ -155,6 +155,7 @@ enum iwl_ucode_tlv_api { * @IWL_UCODE_TLV_CAPA_QUIET_PERIOD_SUPPORT: supports Quiet Period requests * @IWL_UCODE_TLV_CAPA_DQA_SUPPORT: supports dynamic queue allocation (DQA), * which also implies support for the scheduler configuration command + * @IWL_UCODE_TLV_CAPA_HOTSPOT_SUPPORT: supports Hot Spot Command */ enum iwl_ucode_tlv_capa { IWL_UCODE_TLV_CAPA_D0I3_SUPPORT = BIT(0), @@ -163,6 +164,7 @@ enum iwl_ucode_tlv_capa { IWL_UCODE_TLV_CAPA_WFA_TPC_REP_IE_SUPPORT = BIT(10), IWL_UCODE_TLV_CAPA_QUIET_PERIOD_SUPPORT = BIT(11), IWL_UCODE_TLV_CAPA_DQA_SUPPORT = BIT(12), + IWL_UCODE_TLV_CAPA_HOTSPOT_SUPPORT = BIT(18), }; /* The default calibrate table size if not specified by firmware file */ diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index b62405865b25..b6d2683da3a9 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -2448,9 +2448,15 @@ static int iwl_mvm_roc(struct ieee80211_hw *hw, switch (vif->type) { case NL80211_IFTYPE_STATION: - /* Use aux roc framework (HS20) */ - ret = iwl_mvm_send_aux_roc_cmd(mvm, channel, - vif, duration); + if (mvm->fw->ucode_capa.capa[0] & + IWL_UCODE_TLV_CAPA_HOTSPOT_SUPPORT) { + /* Use aux roc framework (HS20) */ + ret = iwl_mvm_send_aux_roc_cmd(mvm, channel, + vif, duration); + goto out_unlock; + } + IWL_ERR(mvm, "hotspot not supported\n"); + ret = -EINVAL; goto out_unlock; case NL80211_IFTYPE_P2P_DEVICE: /* handle below */ From 20ea60ca9952bd19d4b0d74719daba305aef5178 Mon Sep 17 00:00:00 2001 From: lucien Date: Sun, 23 Nov 2014 15:04:11 +0800 Subject: [PATCH 07/29] ip_tunnel: the lack of vti_link_ops' dellink() cause kernel panic Now the vti_link_ops do not point the .dellink, for fb tunnel device (ip_vti0), the net_device will be removed as the default .dellink is unregister_netdevice_queue,but the tunnel still in the tunnel list, then if we add a new vti tunnel, in ip_tunnel_find(): hlist_for_each_entry_rcu(t, head, hash_node) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && link == t->parms.link && ==> type == t->dev->type && ip_tunnel_key_match(&t->parms, flags, key)) break; } the panic will happen, cause dev of ip_tunnel *t is null: [ 3835.072977] IP: [] ip_tunnel_find+0x9d/0xc0 [ip_tunnel] [ 3835.073008] PGD b2c21067 PUD b7277067 PMD 0 [ 3835.073008] Oops: 0000 [#1] SMP ..... [ 3835.073008] Stack: [ 3835.073008] ffff8800b72d77f0 ffffffffa0411924 ffff8800bb956000 ffff8800b72d78e0 [ 3835.073008] ffff8800b72d78a0 0000000000000000 ffffffffa040d100 ffff8800b72d7858 [ 3835.073008] ffffffffa040b2e3 0000000000000000 0000000000000000 0000000000000000 [ 3835.073008] Call Trace: [ 3835.073008] [] ip_tunnel_newlink+0x64/0x160 [ip_tunnel] [ 3835.073008] [] vti_newlink+0x43/0x70 [ip_vti] [ 3835.073008] [] rtnl_newlink+0x4fa/0x5f0 [ 3835.073008] [] ? nla_strlcpy+0x5b/0x70 [ 3835.073008] [] ? rtnl_link_ops_get+0x40/0x60 [ 3835.073008] [] ? rtnl_newlink+0x13f/0x5f0 [ 3835.073008] [] rtnetlink_rcv_msg+0xa4/0x270 [ 3835.073008] [] ? sock_has_perm+0x75/0x90 [ 3835.073008] [] ? rtnetlink_rcv+0x30/0x30 [ 3835.073008] [] netlink_rcv_skb+0xa9/0xc0 [ 3835.073008] [] rtnetlink_rcv+0x28/0x30 .... modprobe ip_vti ip link del ip_vti0 type vti ip link add ip_vti0 type vti rmmod ip_vti do that one or more times, kernel will panic. fix it by assigning ip_tunnel_dellink to vti_link_ops' dellink, in which we skip the unregister of fb tunnel device. do the same on ip6_vti. Signed-off-by: Xin Long Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/ip_vti.c | 1 + net/ipv6/ip6_vti.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 3e861011e4a3..1a7e979e80ba 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -528,6 +528,7 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .validate = vti_tunnel_validate, .newlink = vti_newlink, .changelink = vti_changelink, + .dellink = ip_tunnel_dellink, .get_size = vti_get_size, .fill_info = vti_fill_info, }; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 31089d153fd3..bcda14de7f84 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -905,6 +905,15 @@ static int vti6_newlink(struct net *src_net, struct net_device *dev, return vti6_tnl_create2(dev); } +static void vti6_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct vti6_net *ip6n = net_generic(net, vti6_net_id); + + if (dev != ip6n->fb_tnl_dev) + unregister_netdevice_queue(dev, head); +} + static int vti6_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { @@ -980,6 +989,7 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = { .setup = vti6_dev_setup, .validate = vti6_validate, .newlink = vti6_newlink, + .dellink = vti6_dellink, .changelink = vti6_changelink, .get_size = vti6_get_size, .fill_info = vti6_fill_info, @@ -1020,6 +1030,7 @@ static int __net_init vti6_init_net(struct net *net) if (!ip6n->fb_tnl_dev) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); + ip6n->fb_tnl_dev->rtnl_link_ops = &vti6_link_ops; err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev); if (err < 0) From be6572fdb1bfbe23b2624d477de50af50b02f5d6 Mon Sep 17 00:00:00 2001 From: Yuri Chislov Date: Mon, 24 Nov 2014 11:25:15 +0100 Subject: [PATCH 08/29] ipv6: gre: fix wrong skb->protocol in WCCP When using GRE redirection in WCCP, it sets the wrong skb->protocol, that is, ETH_P_IP instead of ETH_P_IPV6 for the encapuslated traffic. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Cc: Dmitry Kozlov Signed-off-by: Yuri Chislov Tested-by: Yuri Chislov Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4564e1fca3eb..0e32d2e1bdbf 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -502,11 +502,11 @@ static int ip6gre_rcv(struct sk_buff *skb) skb->protocol = gre_proto; /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP + * - Change protocol to IPv6 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); if ((*(h + offset) & 0xF0) != 0x40) offset += 4; } From 2dd34339ac6305c4f2f4e589b858212e339d31e9 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Mon, 24 Nov 2014 13:58:00 +0300 Subject: [PATCH 09/29] xen-netback: do not report success if backend_create_xenvif() fails If xenvif_alloc() or xenbus_scanf() fail in backend_create_xenvif(), xenbus is left in offline mode but netback_probe() reports success. The patch implements propagation of error code for backend_create_xenvif(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Wei Liu Signed-off-by: David S. Miller --- drivers/net/xen-netback/xenbus.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 4e56a27f9689..fab0d4b42f58 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -39,7 +39,7 @@ struct backend_info { static int connect_rings(struct backend_info *be, struct xenvif_queue *queue); static void connect(struct backend_info *be); static int read_xenbus_vif_flags(struct backend_info *be); -static void backend_create_xenvif(struct backend_info *be); +static int backend_create_xenvif(struct backend_info *be); static void unregister_hotplug_status_watch(struct backend_info *be); static void set_backend_state(struct backend_info *be, enum xenbus_state state); @@ -352,7 +352,9 @@ static int netback_probe(struct xenbus_device *dev, be->state = XenbusStateInitWait; /* This kicks hotplug scripts, so do it immediately. */ - backend_create_xenvif(be); + err = backend_create_xenvif(be); + if (err) + goto fail; return 0; @@ -397,19 +399,19 @@ static int netback_uevent(struct xenbus_device *xdev, } -static void backend_create_xenvif(struct backend_info *be) +static int backend_create_xenvif(struct backend_info *be) { int err; long handle; struct xenbus_device *dev = be->dev; if (be->vif != NULL) - return; + return 0; err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle); if (err != 1) { xenbus_dev_fatal(dev, err, "reading handle"); - return; + return (err < 0) ? err : -EINVAL; } be->vif = xenvif_alloc(&dev->dev, dev->otherend_id, handle); @@ -417,10 +419,11 @@ static void backend_create_xenvif(struct backend_info *be) err = PTR_ERR(be->vif); be->vif = NULL; xenbus_dev_fatal(dev, err, "creating interface"); - return; + return err; } kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE); + return 0; } static void backend_disconnect(struct backend_info *be) From 6e58040b848162d77243cf499d026c6253278e29 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 24 Nov 2014 13:32:16 +0200 Subject: [PATCH 10/29] af_packet: fix sparse warning af_packet produces lots of these: net/packet/af_packet.c:384:39: warning: incorrect type in return expression (different modifiers) net/packet/af_packet.c:384:39: expected struct page [pure] * net/packet/af_packet.c:384:39: got struct page * this seems to be because sparse does not realize that _pure refers to function, not the returned pointer. Tweak code slightly to avoid the warning. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 87d20f48ff06..07c04a841ba0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -378,7 +378,7 @@ static void unregister_prot_hook(struct sock *sk, bool sync) __unregister_prot_hook(sk, sync); } -static inline __pure struct page *pgv_to_page(void *addr) +static inline struct page * __pure pgv_to_page(void *addr) { if (is_vmalloc_addr(addr)) return vmalloc_to_page(addr); From 91a0b603469069cdcce4d572b7525ffc9fd352a6 Mon Sep 17 00:00:00 2001 From: Jane Zhou Date: Mon, 24 Nov 2014 11:44:08 -0800 Subject: [PATCH 11/29] net/ping: handle protocol mismatching scenario ping_lookup() may return a wrong sock if sk_buff's and sock's protocols dont' match. For example, sk_buff's protocol is ETH_P_IPV6, but sock's sk_family is AF_INET, in that case, if sk->sk_bound_dev_if is zero, a wrong sock will be returned. the fix is to "continue" the searching, if no matching, return NULL. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Jane Zhou Signed-off-by: Yiwei Zhao Signed-off-by: David S. Miller --- net/ipv4/ping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 57f7c9804139..85a02a75ad83 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -217,6 +217,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) &ipv6_hdr(skb)->daddr)) continue; #endif + } else { + continue; } if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) From 138a7f49270fde7547afe976a01cef2b9fbf3a0e Mon Sep 17 00:00:00 2001 From: Andrew Lutomirski Date: Mon, 24 Nov 2014 12:02:29 -0800 Subject: [PATCH 12/29] net-timestamp: Fix a documentation typo SOF_TIMESTAMPING_OPT_ID puts the id in ee_data, not ee_info. Cc: Willem de Bruijn Signed-off-by: Andy Lutomirski Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/timestamping.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index 412f45ca2d73..1d6d02d6ba52 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -136,7 +136,7 @@ SOF_TIMESTAMPING_OPT_ID: This option is implemented only for transmit timestamps. There, the timestamp is always looped along with a struct sock_extended_err. - The option modifies field ee_info to pass an id that is unique + The option modifies field ee_data to pass an id that is unique among all possibly concurrently outstanding timestamp requests for that socket. In practice, it is a monotonically increasing u32 (that wraps). From f3750817a9034bd8cbb5ba583cd544e7cf14c7d8 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 24 Nov 2014 20:08:32 -0800 Subject: [PATCH 13/29] ip6_udp_tunnel: Fix checksum calculation The UDP checksum calculation for VXLAN tunnels is currently using the socket addresses instead of the actual packet source and destination addresses. As a result the checksum calculated is incorrect in some cases. Also uh->check was being set twice, first it was set to 0, and then it is set again in udp6_set_csum. This change removes the redundant assignment to 0. Fixes: acbf74a7 ("vxlan: Refactor vxlan driver to make use of the common UDP tunnel functions.") Cc: Andy Zhou Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv6/ip6_udp_tunnel.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index b04ed72c4542..8db6c98fe218 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -79,15 +79,13 @@ int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst, uh->source = src_port; uh->len = htons(skb->len); - uh->check = 0; memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); skb_dst_set(skb, dst); - udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr, - &sk->sk_v6_daddr, skb->len); + udp6_set_csum(udp_get_no_check6_tx(sk), skb, saddr, daddr, skb->len); __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); From 3dc2b6a8d38cf6c7604ec25f3d50d6ec8da04435 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 24 Nov 2014 20:08:38 -0800 Subject: [PATCH 14/29] vxlan: Fix boolean flip in VXLAN_F_UDP_ZERO_CSUM6_[TX|RX] In "vxlan: Call udp_sock_create" there was a logic error that resulted in the default for IPv6 VXLAN tunnels going from using checksums to not using checksums. Since there is currently no support in iproute2 for setting these values it means that a kernel after the change cannot talk over a IPv6 VXLAN tunnel to a kernel prior the change. Fixes: 3ee64f3 ("vxlan: Call udp_sock_create") Cc: Tom Herbert Signed-off-by: Alexander Duyck Acked-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e1e335c339e3..be4649a49c5e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2306,9 +2306,9 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6, if (ipv6) { udp_conf.family = AF_INET6; udp_conf.use_udp6_tx_checksums = - !!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); + !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); udp_conf.use_udp6_rx_checksums = - !!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX); + !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX); } else { udp_conf.family = AF_INET; udp_conf.local_ip.s_addr = INADDR_ANY; From 43612d7c04f1a4f5e60104143918fcdf018b66ee Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Tue, 25 Nov 2014 19:54:47 +0100 Subject: [PATCH 15/29] Revert "netfilter: conntrack: fix race in __nf_conntrack_confirm against get_next_corpse" This reverts commit 5195c14c8b27cc0b18220ddbf0e5ad3328a04187. If the conntrack clashes with an existing one, it is left out of the unconfirmed list, thus, crashing when dropping the packet and releasing the conntrack since golden rule is that conntracks are always placed in any of the existing lists for traceability reasons. Reported-by: Daniel Borkmann Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=88841 Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2c699757bccf..5016a6929085 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -611,16 +611,12 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); - - /* We have to check the DYING flag after unlink to prevent - * a race against nf_ct_get_next_corpse() possibly called from - * user context, else we insert an already 'dead' hash, blocking - * further use of that particular connection -JM. - */ - nf_ct_del_from_dying_or_unconfirmed_list(ct); + /* We have to check the DYING flag inside the lock to prevent + a race against nf_ct_get_next_corpse() possibly called from + user context, else we insert an already 'dead' hash, blocking + further use of that particular connection -JM */ if (unlikely(nf_ct_is_dying(ct))) { - nf_ct_add_to_dying_list(ct); nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); return NF_ACCEPT; @@ -640,6 +636,8 @@ __nf_conntrack_confirm(struct sk_buff *skb) zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; + nf_ct_del_from_dying_or_unconfirmed_list(ct); + /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ From a91ed1901a80b401afa1b718d941d3450d868151 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 25 Nov 2014 10:32:06 -0600 Subject: [PATCH 16/29] rtlwifi: rtl8821ae: Fix 5G detection problem The changes associated with moving this driver from staging to the regular tree missed one section setting the allowable rates for the 5GHz band. This patch is needed to fix the regression reported in Bug #88811 (https://bugzilla.kernel.org/show_bug.cgi?id=88811). Reported-by: Valerio Passini Tested-by: Valerio Passini Signed-off-by: Larry Finger Cc: Valerio Passini Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/rtl8821ae/hw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c index 310d3163dc5b..8ec8200002c7 100644 --- a/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/rtlwifi/rtl8821ae/hw.c @@ -3672,8 +3672,9 @@ static void rtl8821ae_update_hal_rate_mask(struct ieee80211_hw *hw, mac->opmode == NL80211_IFTYPE_ADHOC) macid = sta->aid + 1; if (wirelessmode == WIRELESS_MODE_N_5G || - wirelessmode == WIRELESS_MODE_AC_5G) - ratr_bitmap = sta->supp_rates[NL80211_BAND_5GHZ]; + wirelessmode == WIRELESS_MODE_AC_5G || + wirelessmode == WIRELESS_MODE_A) + ratr_bitmap = sta->supp_rates[NL80211_BAND_5GHZ] << 4; else ratr_bitmap = sta->supp_rates[NL80211_BAND_2GHZ]; From 7d63a5f9b25ba6b130da8eb2d32a72b1462d0249 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 25 Nov 2014 10:32:07 -0600 Subject: [PATCH 17/29] rtlwifi: Change order in device startup The existing order of steps when starting the PCI devices works for 2.4G devices, but fails to initialize the 5G section of the RTL8821AE hardware. This patch is needed to fix the regression reported in Bug #88811 (https://bugzilla.kernel.org/show_bug.cgi?id=88811). Reported-by: Valerio Passini Tested-by: Valerio Passini Signed-off-by: Larry Finger Cc: Valerio Passini Signed-off-by: John W. Linville --- drivers/net/wireless/rtlwifi/pci.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index 61f5d36eca6a..846a2e6e34d8 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -2249,6 +2249,16 @@ int rtl_pci_probe(struct pci_dev *pdev, /*like read eeprom and so on */ rtlpriv->cfg->ops->read_eeprom_info(hw); + if (rtlpriv->cfg->ops->init_sw_vars(hw)) { + RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Can't init_sw_vars\n"); + err = -ENODEV; + goto fail3; + } + rtlpriv->cfg->ops->init_sw_leds(hw); + + /*aspm */ + rtl_pci_init_aspm(hw); + /* Init mac80211 sw */ err = rtl_init_core(hw); if (err) { @@ -2264,16 +2274,6 @@ int rtl_pci_probe(struct pci_dev *pdev, goto fail3; } - if (rtlpriv->cfg->ops->init_sw_vars(hw)) { - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, "Can't init_sw_vars\n"); - err = -ENODEV; - goto fail3; - } - rtlpriv->cfg->ops->init_sw_leds(hw); - - /*aspm */ - rtl_pci_init_aspm(hw); - err = ieee80211_register_hw(hw); if (err) { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, From c3658e8d0f10147fc86018be7f11668246c156d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Nov 2014 07:40:04 -0800 Subject: [PATCH 18/29] tcp: fix possible NULL dereference in tcp_vX_send_reset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit ca777eff51f7 ("tcp: remove dst refcount false sharing for prequeue mode") we have to relax check against skb dst in tcp_v[46]_send_reset() if prequeue dropped the dst. If a socket is provided, a full lookup was done to find this socket, so the dst test can be skipped. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=88191 Reported-by: Jaša Bartelj Signed-off-by: Eric Dumazet Reported-by: Daniel Borkmann Fixes: ca777eff51f7 ("tcp: remove dst refcount false sharing for prequeue mode") Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 5 ++++- net/ipv6/tcp_ipv6.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9c7d7621466b..147be2024290 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -598,7 +598,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) if (th->rst) return; - if (skb_rtable(skb)->rt_type != RTN_LOCAL) + /* If sk not NULL, it means we did a successful lookup and incoming + * route had to be correct. prequeue might have dropped our dst. + */ + if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL) return; /* Swap the send and the receive. */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ace29b60813c..dc495ae2ead0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -903,7 +903,10 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) if (th->rst) return; - if (!ipv6_unicast_destination(skb)) + /* If sk not NULL, it means we did a successful lookup and incoming + * route had to be correct. prequeue might have dropped our dst. + */ + if (!sk && !ipv6_unicast_destination(skb)) return; #ifdef CONFIG_TCP_MD5SIG From a620a6bc1c94c22d6c312892be1e0ae171523125 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 25 Nov 2014 14:21:11 -0200 Subject: [PATCH 19/29] tg3: fix ring init when there are more TX than RX channels If TX channels are set to 4 and RX channels are set to less than 4, using ethtool -L, the driver will try to initialize more RX channels than it has allocated, causing an oops. This fix only initializes the RX ring if it has been allocated. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index dbb41c1923e6..77f8f836cbbe 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -8563,7 +8563,8 @@ static int tg3_init_rings(struct tg3 *tp) if (tnapi->rx_rcb) memset(tnapi->rx_rcb, 0, TG3_RX_RCB_RING_BYTES(tp)); - if (tg3_rx_prodring_alloc(tp, &tnapi->prodring)) { + if (tnapi->prodring.rx_std && + tg3_rx_prodring_alloc(tp, &tnapi->prodring)) { tg3_free_rings(tp); return -ENOMEM; } From a566059d89139c3c3ce21b0c7cb37886b2549b78 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 25 Nov 2014 18:08:48 -0800 Subject: [PATCH 20/29] net: dsa: bcm_sf2: fix unmapping registers in case of errors In case we fail to ioremap() one of our registers, we would be leaking existing mappings, unwind those accordingly on errors. Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index b9625968daac..46632e8b6336 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -404,7 +404,8 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) *base = of_iomap(dn, i); if (*base == NULL) { pr_err("unable to find register: %s\n", reg_names[i]); - return -ENODEV; + ret = -ENOMEM; + goto out_unmap; } base++; } @@ -484,7 +485,8 @@ out_free_irq0: out_unmap: base = &priv->core; for (i = 0; i < BCM_SF2_REGS_NUM; i++) { - iounmap(*base); + if (*base) + iounmap(*base); base++; } return ret; From 33f846142919f0ecceb9fde99fecdbc307dbf541 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 25 Nov 2014 18:08:49 -0800 Subject: [PATCH 21/29] net: dsa: bcm_sf2: reset switch prior to initialization Our boot agent may have left the switch in an certain configuration state, make sure we issue a software reset prior to configuring the switch in order to ensure the HW is in a consistent state, in particular transmit queues and internal buffers. Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 52 ++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 46632e8b6336..4f4c2a7888e5 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -377,6 +377,29 @@ static irqreturn_t bcm_sf2_switch_1_isr(int irq, void *dev_id) return IRQ_HANDLED; } +static int bcm_sf2_sw_rst(struct bcm_sf2_priv *priv) +{ + unsigned int timeout = 1000; + u32 reg; + + reg = core_readl(priv, CORE_WATCHDOG_CTRL); + reg |= SOFTWARE_RESET | EN_CHIP_RST | EN_SW_RESET; + core_writel(priv, reg, CORE_WATCHDOG_CTRL); + + do { + reg = core_readl(priv, CORE_WATCHDOG_CTRL); + if (!(reg & SOFTWARE_RESET)) + break; + + usleep_range(1000, 2000); + } while (timeout-- > 0); + + if (timeout == 0) + return -ETIMEDOUT; + + return 0; +} + static int bcm_sf2_sw_setup(struct dsa_switch *ds) { const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME; @@ -410,6 +433,12 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) base++; } + ret = bcm_sf2_sw_rst(priv); + if (ret) { + pr_err("unable to software reset switch: %d\n", ret); + goto out_unmap; + } + /* Disable all interrupts and request them */ intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET); intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR); @@ -735,29 +764,6 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds) return 0; } -static int bcm_sf2_sw_rst(struct bcm_sf2_priv *priv) -{ - unsigned int timeout = 1000; - u32 reg; - - reg = core_readl(priv, CORE_WATCHDOG_CTRL); - reg |= SOFTWARE_RESET | EN_CHIP_RST | EN_SW_RESET; - core_writel(priv, reg, CORE_WATCHDOG_CTRL); - - do { - reg = core_readl(priv, CORE_WATCHDOG_CTRL); - if (!(reg & SOFTWARE_RESET)) - break; - - usleep_range(1000, 2000); - } while (timeout-- > 0); - - if (timeout == 0) - return -ETIMEDOUT; - - return 0; -} - static int bcm_sf2_sw_resume(struct dsa_switch *ds) { struct bcm_sf2_priv *priv = ds_to_priv(ds); From 2d5c57d7fbfaa642fb7f0673df24f32b83d9066c Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 25 Nov 2014 11:54:31 +0200 Subject: [PATCH 22/29] net/mlx4_core: Limit count field to 24 bits in qp_alloc_res Some VF drivers use the upper byte of "param1" (the qp count field) in mlx4_qp_reserve_range() to pass flags which are used to optimize the range allocation. Under the current code, if any of these flags are set, the 32-bit count field yields a count greater than 2^24, which is out of range, and this VF fails. As these flags represent a "best-effort" allocation hint anyway, they may safely be ignored. Therefore, the PF driver may simply mask out the bits. Fixes: c82e9aa0a8 "mlx4_core: resource tracking for HCA resources used by guests" Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 5d2498dcf536..cd5cf6d957c7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1546,7 +1546,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, switch (op) { case RES_OP_RESERVE: - count = get_param_l(&in_param); + count = get_param_l(&in_param) & 0xffffff; align = get_param_h(&in_param); err = mlx4_grant_resource(dev, slave, RES_QP, count, 0); if (err) From 571dcfde23712b92e45a126f415d9424af0c2886 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 26 Nov 2014 10:38:06 +0800 Subject: [PATCH 23/29] stmmac: platform: fix default values of the filter bins setting The commit 3b57de958e2a brought the support for a different amount of the filter bins, but didn't update the platform driver that without CONFIG_OF. Fixes: 3b57de958e2a (net: stmmac: Support devicetree configs for mcast and ucast filter entries) Signed-off-by: Huacai Chen Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index db56fa7ce8f9..5b0da3986216 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -177,12 +177,6 @@ static int stmmac_probe_config_dt(struct platform_device *pdev, */ plat->maxmtu = JUMBO_LEN; - /* Set default value for multicast hash bins */ - plat->multicast_filter_bins = HASH_TABLE_SIZE; - - /* Set default value for unicast filter entries */ - plat->unicast_filter_entries = 1; - /* * Currently only the properties needed on SPEAr600 * are provided. All other properties should be added @@ -270,6 +264,13 @@ static int stmmac_pltfr_probe(struct platform_device *pdev) return PTR_ERR(addr); plat_dat = dev_get_platdata(&pdev->dev); + + /* Set default value for multicast hash bins */ + plat_dat->multicast_filter_bins = HASH_TABLE_SIZE; + + /* Set default value for unicast filter entries */ + plat_dat->unicast_filter_entries = 1; + if (pdev->dev.of_node) { if (!plat_dat) plat_dat = devm_kzalloc(&pdev->dev, From 6e8d1c55454574a22b3e8e263b1a12888909b033 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 26 Nov 2014 13:42:16 +0100 Subject: [PATCH 24/29] bridge: Validate IFLA_BRIDGE_FLAGS attribute length Payload is currently accessed blindly and may exceed valid message boundaries. Fixes: 407af3299 ("bridge: Add netlink interface to configure vlans on bridge ports") Cc: Vlad Yasevich Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a6882686ca3a..5a853f8fad6c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2798,6 +2798,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + if (nla_len(attr) < sizeof(flags)) + return -EINVAL; + have_flags = true; flags = nla_get_u16(attr); break; @@ -2868,6 +2871,9 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + if (nla_len(attr) < sizeof(flags)) + return -EINVAL; + have_flags = true; flags = nla_get_u16(attr); break; From b7c1a314112785c319b2ba2dc8e73497714e42a1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 26 Nov 2014 13:42:17 +0100 Subject: [PATCH 25/29] net: Validate IFLA_BRIDGE_MODE attribute length Payload is currently accessed blindly and may exceed valid message boundaries. Fixes: a77dcb8c8 ("be2net: set and query VEB/VEPA mode of the PF interface") Fixes: 815cccbf1 ("ixgbe: add setlink, getlink support to ixgbe and ixgbevf") Cc: Ajit Khaparde Cc: John Fastabend Signed-off-by: Thomas Graf Acked-by: Jeff Kirsher Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 3 +++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 3e8475cae4f9..337e4cd70a13 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4314,6 +4314,9 @@ static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh) if (nla_type(attr) != IFLA_BRIDGE_MODE) continue; + if (nla_len(attr) < sizeof(mode)) + return -EINVAL; + mode = nla_get_u16(attr); if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB) return -EINVAL; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 82ffe8bdb898..dff9905331ba 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7677,6 +7677,9 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, if (nla_type(attr) != IFLA_BRIDGE_MODE) continue; + if (nla_len(attr) < sizeof(mode)) + return -EINVAL; + mode = nla_get_u16(attr); if (mode == BRIDGE_MODE_VEPA) { reg = 0; From 4ea85e831e290cd967d161c66d0a3cf8be39f1f6 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 26 Nov 2014 13:42:18 +0100 Subject: [PATCH 26/29] net: Check for presence of IFLA_AF_SPEC ndo_bridge_setlink() is currently only called on the slave if IFLA_AF_SPEC is set but this is a very fragile assumption and may change in the future. Cc: Ajit Khaparde Cc: John Fastabend Signed-off-by: Thomas Graf Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 2 ++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 337e4cd70a13..597c463e384d 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4309,6 +4309,8 @@ static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh) return -EOPNOTSUPP; br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { if (nla_type(attr) != IFLA_BRIDGE_MODE) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index dff9905331ba..cc51554c9e99 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7669,6 +7669,8 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev, return -EOPNOTSUPP; br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { __u16 mode; From 6f705d8cfc0af3607aed890c53d86255e315c6f4 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 26 Nov 2014 13:42:19 +0100 Subject: [PATCH 27/29] bridge: Add missing policy entry for IFLA_BRPORT_FAST_LEAVE Fixes: c2d3babf ("bridge: implement multicast fast leave") Cc: David S. Miller Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 2ff9706647f2..e5ec470b851f 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -280,6 +280,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, + [IFLA_BRPORT_FAST_LEAVE]= { .type = NLA_U8 }, [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, }; From aa68c20ff32f9a6fb3ca7f93ed9beae01899d00d Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 26 Nov 2014 13:42:20 +0100 Subject: [PATCH 28/29] bridge: Sanitize IFLA_EXT_MASK for AF_BRIDGE:RTM_GETLINK Only search for IFLA_EXT_MASK if the message actually carries a ifinfomsg header and validate minimal length requirements for IFLA_EXT_MASK. Fixes: 6cbdceeb ("bridge: Dump vlan information from a bridge port") Cc: Vlad Yasevich Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5a853f8fad6c..b9b7dfaf202b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2685,13 +2685,20 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0; u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = cb->nlh->nlmsg_seq; - struct nlattr *extfilt; u32 filter_mask = 0; - extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg), - IFLA_EXT_MASK); - if (extfilt) - filter_mask = nla_get_u32(extfilt); + if (nlmsg_len(cb->nlh) > sizeof(struct ifinfomsg)) { + struct nlattr *extfilt; + + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg), + IFLA_EXT_MASK); + if (extfilt) { + if (nla_len(extfilt) < sizeof(filter_mask)) + return -EINVAL; + + filter_mask = nla_get_u32(extfilt); + } + } rcu_read_lock(); for_each_netdev_rcu(net, dev) { From f4713a3dfad045d46afcb9c2a7d0bba288920ed4 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 26 Nov 2014 14:53:02 -0500 Subject: [PATCH 29/29] net-timestamp: make tcp_recvmsg call ipv6_recv_error for AF_INET6 socks TCP timestamping introduced MSG_ERRQUEUE handling for TCP sockets. If the socket is of family AF_INET6, call ipv6_recv_error instead of ip_recv_error. This change is more complex than a single branch due to the loadable ipv6 module. It reuses a pre-existing indirect function call from ping. The ping code is safe to call, because it is part of the core ipv6 module and always present when AF_INET6 sockets are active. Fixes: 4ed2d765 (net-timestamp: TCP timestamping) Signed-off-by: Willem de Bruijn ---- It may also be worthwhile to add WARN_ON_ONCE(sk->family == AF_INET6) to ip_recv_error. Signed-off-by: David S. Miller --- include/net/inet_common.h | 2 ++ net/ipv4/af_inet.c | 11 +++++++++++ net/ipv4/ping.c | 12 ++---------- net/ipv4/tcp.c | 2 +- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/include/net/inet_common.h b/include/net/inet_common.h index fe7994c48b75..b2828a06a5a6 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -37,6 +37,8 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net); +int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); static inline void inet_ctl_sock_destroy(struct sock *sk) { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8b7fe5b03906..e67da4e6c324 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1386,6 +1386,17 @@ out: return pp; } +int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) +{ + if (sk->sk_family == AF_INET) + return ip_recv_error(sk, msg, len, addr_len); +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); +#endif + return -EINVAL; +} + static int inet_gro_complete(struct sk_buff *skb, int nhoff) { __be16 newlen = htons(skb->len - nhoff); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 85a02a75ad83..5d740cccf69e 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -855,16 +855,8 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (flags & MSG_ERRQUEUE) { - if (family == AF_INET) { - return ip_recv_error(sk, msg, len, addr_len); -#if IS_ENABLED(CONFIG_IPV6) - } else if (family == AF_INET6) { - return pingv6_ops.ipv6_recv_error(sk, msg, len, - addr_len); -#endif - } - } + if (flags & MSG_ERRQUEUE) + return inet_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 39ec0c379545..38c2bcb8dd5d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1598,7 +1598,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, u32 urg_hole = 0; if (unlikely(flags & MSG_ERRQUEUE)) - return ip_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len, addr_len); if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && (sk->sk_state == TCP_ESTABLISHED))