From c2d0f48a13e53b4747704c9e692f5e765e52041a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 30 Nov 2016 21:47:09 +0100 Subject: [PATCH 01/34] batman-adv: Check for alloc errors when preparing TT local data batadv_tt_prepare_tvlv_local_data can fail to allocate the memory for the new TVLV block. The caller is informed about this problem with the returned length of 0. Not checking this value results in an invalid memory access when either tt_data or tt_change is accessed. Reported-by: Dan Carpenter Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 7f663092f6de..0dc85eb1cb7a 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3282,7 +3282,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, &tvlv_tt_data, &tt_change, &tt_len); - if (!tt_len) + if (!tt_len || !tvlv_len) goto unlock; /* Copy the last orig_node's OGM buffer */ @@ -3300,7 +3300,7 @@ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, &tvlv_tt_data, &tt_change, &tt_len); - if (!tt_len) + if (!tt_len || !tvlv_len) goto out; /* fill the rest of the tvlv with the real TT entries */ From c66ebf2db555c6ed705044eabd2b37dcd546f68b Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sat, 3 Dec 2016 21:49:08 +0800 Subject: [PATCH 02/34] net: dcb: set error code on failures In function dcbnl_cee_fill(), returns the value of variable err on errors. However, on some error paths (e.g. nla put fails), its value may be 0. It may be better to explicitly set a negative errno to variable err before returning. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188881 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 4f6c1862dfd2..3202d75329b5 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1353,6 +1353,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) dcb_unlock: spin_unlock_bh(&dcb_lock); nla_put_failure: + err = -EMSGSIZE; return err; } From 1a239173cccff726b60ac6a9c79ae4a1e26cfa49 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 1 Dec 2016 07:27:52 -0500 Subject: [PATCH 03/34] ipv4: Drop leaf from suffix pull/push functions It wasn't necessary to pass a leaf in when doing the suffix updates so just drop it. Instead just pass the suffix and work with that. Since we dropped the leaf there is no need to include that in the name so the names are updated to node_push_suffix and node_pull_suffix. Finally I noticed that the logic for pulling the suffix length back actually had some issues. Specifically it would stop prematurely if there was a longer suffix, but it was not as long as the original suffix. I updated the code to address that in node_pull_suffix. Fixes: 5405afd1a306 ("fib_trie: Add tracking value for suffix length") Suggested-by: Robert Shearman Signed-off-by: Alexander Duyck Reviewed-by: Robert Shearman Tested-by: Robert Shearman Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 026f309c51e9..eec90d72dd52 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -930,22 +930,24 @@ static struct key_vector *resize(struct trie *t, struct key_vector *tn) return tp; } -static void leaf_pull_suffix(struct key_vector *tp, struct key_vector *l) +static void node_pull_suffix(struct key_vector *tn, unsigned char slen) { - while ((tp->slen > tp->pos) && (tp->slen > l->slen)) { - if (update_suffix(tp) > l->slen) + unsigned char node_slen = tn->slen; + + while ((node_slen > tn->pos) && (node_slen > slen)) { + slen = update_suffix(tn); + if (node_slen == slen) break; - tp = node_parent(tp); + + tn = node_parent(tn); + node_slen = tn->slen; } } -static void leaf_push_suffix(struct key_vector *tn, struct key_vector *l) +static void node_push_suffix(struct key_vector *tn, unsigned char slen) { - /* if this is a new leaf then tn will be NULL and we can sort - * out parent suffix lengths as a part of trie_rebalance - */ - while (tn->slen < l->slen) { - tn->slen = l->slen; + while (tn->slen < slen) { + tn->slen = slen; tn = node_parent(tn); } } @@ -1107,7 +1109,7 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, /* if we added to the tail node then we need to update slen */ if (l->slen < new->fa_slen) { l->slen = new->fa_slen; - leaf_push_suffix(tp, l); + node_push_suffix(tp, new->fa_slen); } return 0; @@ -1511,7 +1513,7 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, /* update the trie with the latest suffix length */ l->slen = fa->fa_slen; - leaf_pull_suffix(tp, l); + node_pull_suffix(tp, fa->fa_slen); } /* Caller must hold RTNL. */ From a52ca62c4a6771028da9c1de934cdbcd93d54bb4 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 1 Dec 2016 07:27:57 -0500 Subject: [PATCH 04/34] ipv4: Drop suffix update from resize code It has been reported that update_suffix can be expensive when it is called on a large node in which most of the suffix lengths are the same. The time required to add 200K entries had increased from around 3 seconds to almost 49 seconds. In order to address this we need to move the code for updating the suffix out of resize and instead just have it handled in the cases where we are pushing a node that increases the suffix length, or will decrease the suffix length. Fixes: 5405afd1a306 ("fib_trie: Add tracking value for suffix length") Reported-by: Robert Shearman Signed-off-by: Alexander Duyck Reviewed-by: Robert Shearman Tested-by: Robert Shearman Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index eec90d72dd52..e3665bf7a7f3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -719,6 +719,13 @@ static unsigned char update_suffix(struct key_vector *tn) { unsigned char slen = tn->pos; unsigned long stride, i; + unsigned char slen_max; + + /* only vector 0 can have a suffix length greater than or equal to + * tn->pos + tn->bits, the second highest node will have a suffix + * length at most of tn->pos + tn->bits - 1 + */ + slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen); /* search though the list of children looking for nodes that might * have a suffix greater than the one we currently have. This is @@ -736,12 +743,8 @@ static unsigned char update_suffix(struct key_vector *tn) slen = n->slen; i &= ~(stride - 1); - /* if slen covers all but the last bit we can stop here - * there will be nothing longer than that since only node - * 0 and 1 << (bits - 1) could have that as their suffix - * length. - */ - if ((slen + 1) >= (tn->pos + tn->bits)) + /* stop searching if we have hit the maximum possible value */ + if (slen >= slen_max) break; } @@ -913,21 +916,7 @@ static struct key_vector *resize(struct trie *t, struct key_vector *tn) return collapse(t, tn); /* update parent in case halve failed */ - tp = node_parent(tn); - - /* Return if at least one deflate was run */ - if (max_work != MAX_WORK) - return tp; - - /* push the suffix length to the parent node */ - if (tn->slen > tn->pos) { - unsigned char slen = update_suffix(tn); - - if (slen > tp->slen) - tp->slen = slen; - } - - return tp; + return node_parent(tn); } static void node_pull_suffix(struct key_vector *tn, unsigned char slen) @@ -1068,6 +1057,7 @@ static int fib_insert_node(struct trie *t, struct key_vector *tp, } /* Case 3: n is NULL, and will just insert a new leaf */ + node_push_suffix(tp, new->fa_slen); NODE_INIT_PARENT(l, tp); put_child_root(tp, key, l); trie_rebalance(t, tp); @@ -1501,6 +1491,8 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, * out parent suffix lengths as a part of trie_rebalance */ if (hlist_empty(&l->leaf)) { + if (tp->slen == l->slen) + node_pull_suffix(tp, tp->pos); put_child_root(tp, l->key, NULL); node_free(l); trie_rebalance(t, tp); @@ -1785,6 +1777,10 @@ void fib_table_flush_external(struct fib_table *tb) if (IS_TRIE(pn)) break; + /* update the suffix to address pulled leaves */ + if (pn->slen > pn->pos) + update_suffix(pn); + /* resize completed node */ pn = resize(t, pn); cindex = get_index(pkey, pn); @@ -1851,6 +1847,10 @@ int fib_table_flush(struct net *net, struct fib_table *tb) if (IS_TRIE(pn)) break; + /* update the suffix to address pulled leaves */ + if (pn->slen > pn->pos) + update_suffix(pn); + /* resize completed node */ pn = resize(t, pn); cindex = get_index(pkey, pn); From 89aa8445cd4e8c2556c40d42dd0ceb2cbb96ba78 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sat, 3 Dec 2016 17:56:17 +0800 Subject: [PATCH 05/34] netdev: broadcom: propagate error code Function bnxt_hwrm_stat_ctx_alloc() always returns 0, even if the call to _hwrm_send_message() fails. It may be better to propagate the errors to the caller of bnxt_hwrm_stat_ctx_alloc(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188661 Signed-off-by: Pan Bian Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ee1a803aa11a..f08a20b921e7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4120,7 +4120,7 @@ static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp) bp->grp_info[i].fw_stats_ctx = cpr->hw_stats_ctx_id; } mutex_unlock(&bp->hwrm_cmd_lock); - return 0; + return rc; } static int bnxt_hwrm_func_qcfg(struct bnxt *bp) From 14dd3e1b970feb125e4f453bc3b0569db5b2069b Mon Sep 17 00:00:00 2001 From: Suraj Deshmukh Date: Sat, 3 Dec 2016 07:59:26 +0000 Subject: [PATCH 06/34] net: af_mpls.c add space before open parenthesis Adding space after switch keyword before open parenthesis for readability purpose. This patch fixes the checkpatch.pl warning: space required before the open parenthesis '(' Signed-off-by: Suraj Deshmukh Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 0e4334cbde17..15fe97644ffe 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1252,7 +1252,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (!nla) continue; - switch(index) { + switch (index) { case RTA_OIF: cfg->rc_ifindex = nla_get_u32(nla); break; From b59589635ff01cc25270360709eeeb5c45c6abb9 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sat, 3 Dec 2016 19:33:23 +0800 Subject: [PATCH 07/34] net: bridge: set error code on failure Function br_sysfs_addbr() does not set error code when the call kobject_create_and_add() returns a NULL pointer. It may be better to return "-ENOMEM" when kobject_create_and_add() fails. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188781 Signed-off-by: Pan Bian Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_sysfs_br.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index e120307c6e36..f88c4df3f91e 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -898,6 +898,7 @@ int br_sysfs_addbr(struct net_device *dev) if (!br->ifobj) { pr_info("%s: can't add kobject (directory) %s/%s\n", __func__, dev->name, SYSFS_BRIDGE_PORT_SUBDIR); + err = -ENOMEM; goto out3; } return 0; From 51920830d9d0eb617af18dc60443fcd4fb50a533 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sat, 3 Dec 2016 19:24:48 +0800 Subject: [PATCH 08/34] net: usb: set error code when usb_alloc_urb fails In function lan78xx_probe(), variable ret takes the errno code on failures. However, when the call to usb_alloc_urb() fails, its value will keeps 0. 0 indicates success in the context, which is inconsistent with the execution result. This patch fixes the bug, assigning "-ENOMEM" to ret when usb_alloc_urb() returns a NULL pointer. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188771 Signed-off-by: Pan Bian Acked-by: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index db558b8b32fe..f33460cec79f 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3395,6 +3395,7 @@ static int lan78xx_probe(struct usb_interface *intf, if (buf) { dev->urb_intr = usb_alloc_urb(0, GFP_KERNEL); if (!dev->urb_intr) { + ret = -ENOMEM; kfree(buf); goto out3; } else { From 4606c9e8c541f97034e53e644129376a6170b8c7 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sat, 3 Dec 2016 20:25:45 +0800 Subject: [PATCH 09/34] atm: lanai: set error code when ioremap fails In function lanai_dev_open(), when the call to ioremap() fails, the value of return variable result is 0. 0 means no error in this context. This patch fixes the bug, assigning "-ENOMEM" to result when ioremap() returns a NULL pointer. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188791 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/atm/lanai.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index ce43ae3e87b3..445505d9ea07 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -2143,6 +2143,7 @@ static int lanai_dev_open(struct atm_dev *atmdev) lanai->base = (bus_addr_t) ioremap(raw_base, LANAI_MAPPING_SIZE); if (lanai->base == NULL) { printk(KERN_ERR DEV_LABEL ": couldn't remap I/O space\n"); + result = -ENOMEM; goto error_pci; } /* 3.3: Reset lanai and PHY */ From 0eab121ef8750a5c8637d51534d5e9143fb0633f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 5 Dec 2016 10:34:38 -0800 Subject: [PATCH 10/34] net: ping: check minimum size on ICMP header length Prior to commit c0371da6047a ("put iov_iter into msghdr") in v3.19, there was no check that the iovec contained enough bytes for an ICMP header, and the read loop would walk across neighboring stack contents. Since the iov_iter conversion, bad arguments are noticed, but the returned error is EFAULT. Returning EINVAL is a clearer error and also solves the problem prior to v3.19. This was found using trinity with KASAN on v3.18: BUG: KASAN: stack-out-of-bounds in memcpy_fromiovec+0x60/0x114 at addr ffffffc071077da0 Read of size 8 by task trinity-c2/9623 page:ffffffbe034b9a08 count:0 mapcount:0 mapping: (null) index:0x0 flags: 0x0() page dumped because: kasan: bad access detected CPU: 0 PID: 9623 Comm: trinity-c2 Tainted: G BU 3.18.0-dirty #15 Hardware name: Google Tegra210 Smaug Rev 1,3+ (DT) Call trace: [] dump_backtrace+0x0/0x1ac arch/arm64/kernel/traps.c:90 [] show_stack+0x10/0x1c arch/arm64/kernel/traps.c:171 [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0x7c/0xd0 lib/dump_stack.c:50 [< inline >] print_address_description mm/kasan/report.c:147 [< inline >] kasan_report_error mm/kasan/report.c:236 [] kasan_report+0x380/0x4b8 mm/kasan/report.c:259 [< inline >] check_memory_region mm/kasan/kasan.c:264 [] __asan_load8+0x20/0x70 mm/kasan/kasan.c:507 [] memcpy_fromiovec+0x5c/0x114 lib/iovec.c:15 [< inline >] memcpy_from_msg include/linux/skbuff.h:2667 [] ping_common_sendmsg+0x50/0x108 net/ipv4/ping.c:674 [] ping_v4_sendmsg+0xd8/0x698 net/ipv4/ping.c:714 [] inet_sendmsg+0xe0/0x12c net/ipv4/af_inet.c:749 [< inline >] __sock_sendmsg_nosec net/socket.c:624 [< inline >] __sock_sendmsg net/socket.c:632 [] sock_sendmsg+0x124/0x164 net/socket.c:643 [< inline >] SYSC_sendto net/socket.c:1797 [] SyS_sendto+0x178/0x1d8 net/socket.c:1761 CVE-2016-8399 Reported-by: Qidan He Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- net/ipv4/ping.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 205e2000d395..96b8e2b95731 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -657,6 +657,10 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, if (len > 0xFFFF) return -EMSGSIZE; + /* Must have at least a full ICMP header. */ + if (len < icmph_len) + return -EINVAL; + /* * Check the flags. */ From c79e167c3cba066892542f3dfb5e73b7207e01df Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 12:15:44 +0800 Subject: [PATCH 11/34] net: caif: remove ineffective check The check of the return value of sock_register() is ineffective. "if(!err)" seems to be a typo. It is better to propagate the error code to the callers of caif_sktinit_module(). This patch removes the check statment and directly returns the result of sock_register(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188751 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- net/caif/caif_socket.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index aa209b1066c9..92cbbd2afddb 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1107,10 +1107,7 @@ static struct net_proto_family caif_family_ops = { static int __init caif_sktinit_module(void) { - int err = sock_register(&caif_family_ops); - if (!err) - return err; - return 0; + return sock_register(&caif_family_ops); } static void __exit caif_sktexit_module(void) From 8ad3ba934587c8ecbfee13331d859a7849afdfbb Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 13:27:40 +0800 Subject: [PATCH 12/34] net: irda: set error code on failures When the calls to kzalloc() fail, the value of return variable ret may be 0. 0 means success in this context. This patch fixes the bug, assigning "-ENOMEM" to ret before calling kzalloc(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188971 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/net/irda/irda-usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c index a198946bc54f..8716b8c07feb 100644 --- a/drivers/net/irda/irda-usb.c +++ b/drivers/net/irda/irda-usb.c @@ -1723,6 +1723,7 @@ static int irda_usb_probe(struct usb_interface *intf, /* Don't change this buffer size and allocation without doing * some heavy and complete testing. Don't ask why :-( * Jean II */ + ret = -ENOMEM; self->speed_buff = kzalloc(IRDA_USB_SPEED_MTU, GFP_KERNEL); if (!self->speed_buff) goto err_out_3; From 7cf6156633b71743c09a8e56b1f0dedfc4ce6e66 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 13:45:15 +0800 Subject: [PATCH 13/34] atm: fix improper return value It returns variable "error" when ioremap_nocache() returns a NULL pointer. The value of "error" is 0 then, which will mislead the callers to believe that there is no error. This patch fixes the bug, returning "-ENOMEM". Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=189021 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/atm/eni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index f2aaf9e32a36..40c2d561417b 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -1727,7 +1727,7 @@ static int eni_do_init(struct atm_dev *dev) printk("\n"); printk(KERN_ERR DEV_LABEL "(itf %d): can't set up page " "mapping\n",dev->number); - return error; + return -ENOMEM; } eni_dev->ioaddr = base; eni_dev->base_diff = real_base - (unsigned long) base; From 0ff18d2d36efad65572990fa7febeb3ebe19da89 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 13:53:53 +0800 Subject: [PATCH 14/34] net: ethernet: qlogic: set error code on failure When calling dma_mapping_error(), the value of return variable rc is 0. And when the call returns an unexpected value, rc is not set to a negative errno. Thus, it will return 0 on the error path, and its callers cannot detect the bug. This patch fixes the bug, assigning "-ENOMEM" to err. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=189041 Signed-off-by: Pan Bian Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index f95385cbbd40..62ae55bd81b8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -1730,6 +1730,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) mapping))) { DP_NOTICE(cdev, "Unable to map frag - dropping packet\n"); + rc = -ENOMEM; goto err; } } else { From 005f7e68e74df94c2a676b5a3e98c6fb65aae606 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 18:46:03 +0800 Subject: [PATCH 15/34] net: bnx2x: fix improper return value Macro BNX2X_ALLOC_AND_SET(arr, lbl, func) calls kmalloc() to allocate memory, and jumps to label "lbl" if the allocation fails. Label "lbl" first cleans memory and then returns variable rc. Before calling the macro, the value of variable rc is 0. Because 0 means no error, the callers of bnx2x_init_firmware() may be misled. This patch fixes the bug, assigning "-ENOMEM" to rc before calling macro NX2X_ALLOC_AND_SET(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=189141 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 0cee4c0283f9..6f9fc20f0512 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13505,6 +13505,7 @@ static int bnx2x_init_firmware(struct bnx2x *bp) /* Initialize the pointers to the init arrays */ /* Blob */ + rc = -ENOMEM; BNX2X_ALLOC_AND_SET(init_data, request_firmware_exit, be32_to_cpu_n); /* Opcodes */ From 9a53682b340b97642793271ba095cc9531a7b649 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 18:43:31 +0800 Subject: [PATCH 16/34] isdn: hisax: set error code on failure In function hfc4s8s_probe(), the value of return variable err should be negative on failures. However, when the call to request_region() returns NULL, the value of err is 0. This patch fixes the bug, assigning "-EBUSY" to err on the path that request_region() fails. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188931 Signed-off-by: Pan Bian Signed-off-by: David S. Miller --- drivers/isdn/hisax/hfc4s8s_l1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/isdn/hisax/hfc4s8s_l1.c b/drivers/isdn/hisax/hfc4s8s_l1.c index 9600cd771f1a..e034ed847ff3 100644 --- a/drivers/isdn/hisax/hfc4s8s_l1.c +++ b/drivers/isdn/hisax/hfc4s8s_l1.c @@ -1499,6 +1499,7 @@ hfc4s8s_probe(struct pci_dev *pdev, const struct pci_device_id *ent) printk(KERN_INFO "HFC-4S/8S: failed to request address space at 0x%04x\n", hw->iobase); + err = -EBUSY; goto out; } From 65870fa77fd7f83d7be4ed924d47ed9e3831f434 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 4 Dec 2016 15:30:17 +0200 Subject: [PATCH 17/34] bnx2x: Correct ringparam estimate when DOWN Until interface is up [and assuming ringparams weren't explicitly configured] when queried for the size of its rings bnx2x would claim they're the maximal size by default. That is incorrect as by default the maximal number of buffers would be equally divided between the various rx rings. This prevents the user from actually setting the number of elements on each rx ring to be of maximal size prior to transitioning the interface into up state. To fix this, make a rough estimation about the number of buffers. It wouldn't always be accurate, but it would be much better than current estimation and would allow users to increase number of buffers during early initialization of the interface. Reported-by: Seymour, Shane Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 85a7800bfc12..5f19427c7b27 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -1872,8 +1872,16 @@ static void bnx2x_get_ringparam(struct net_device *dev, ering->rx_max_pending = MAX_RX_AVAIL; + /* If size isn't already set, we give an estimation of the number + * of buffers we'll have. We're neglecting some possible conditions + * [we couldn't know for certain at this point if number of queues + * might shrink] but the number would be correct for the likely + * scenario. + */ if (bp->rx_ring_size) ering->rx_pending = bp->rx_ring_size; + else if (BNX2X_NUM_RX_QUEUES(bp)) + ering->rx_pending = MAX_RX_AVAIL / BNX2X_NUM_RX_QUEUES(bp); else ering->rx_pending = MAX_RX_AVAIL; From 360d9df2acd9f0b89aabaf16fca08954f113bd4e Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sun, 4 Dec 2016 15:30:18 +0200 Subject: [PATCH 18/34] bnx2x: Prevent tunnel config for 577xx Only the 578xx adapters are capable of configuring UDP ports for the purpose of tunnelling - doing the same on 577xx might lead to a firmware assertion. We're already not claiming support for any related feature for such devices, but we also need to prevent the configuration of the UDP ports to the device in this case. Fixes: f34fa14cc033 ("bnx2x: Add vxlan RSS support") Reported-by: Anikina Anna Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 6f9fc20f0512..4febe60eadc2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10138,7 +10138,7 @@ static void __bnx2x_add_udp_port(struct bnx2x *bp, u16 port, { struct bnx2x_udp_tunnel *udp_port = &bp->udp_tunnel_ports[type]; - if (!netif_running(bp->dev) || !IS_PF(bp)) + if (!netif_running(bp->dev) || !IS_PF(bp) || CHIP_IS_E1x(bp)) return; if (udp_port->count && udp_port->dst_port == port) { @@ -10163,7 +10163,7 @@ static void __bnx2x_del_udp_port(struct bnx2x *bp, u16 port, { struct bnx2x_udp_tunnel *udp_port = &bp->udp_tunnel_ports[type]; - if (!IS_PF(bp)) + if (!IS_PF(bp) || CHIP_IS_E1x(bp)) return; if (!udp_port->count || udp_port->dst_port != port) { From c823abac17926767fb50175e098f087a6ac684c3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 4 Dec 2016 19:22:05 -0800 Subject: [PATCH 19/34] net: ep93xx_eth: Do not crash unloading module When we unload the ep93xx_eth, whether we have opened the network interface or not, we will either hit a kernel paging request error, or a simple NULL pointer de-reference because: - if ep93xx_open has been called, we have created a valid DMA mapping for ep->descs, when we call ep93xx_stop, we also call ep93xx_free_buffers, ep->descs now has a stale value - if ep93xx_open has not been called, we have a NULL pointer for ep->descs, so performing any operation against that address just won't work Fix this by adding a NULL pointer check for ep->descs which means that ep93xx_free_buffers() was able to successfully tear down the descriptors and free the DMA cookie as well. Fixes: 1d22e05df818 ("[PATCH] Cirrus Logic ep93xx ethernet driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/cirrus/ep93xx_eth.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index de9f7c97d916..9a161e981529 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -468,6 +468,9 @@ static void ep93xx_free_buffers(struct ep93xx_priv *ep) struct device *dev = ep->dev->dev.parent; int i; + if (!ep->descs) + return; + for (i = 0; i < RX_QUEUE_ENTRIES; i++) { dma_addr_t d; @@ -490,6 +493,7 @@ static void ep93xx_free_buffers(struct ep93xx_priv *ep) dma_free_coherent(dev, sizeof(struct ep93xx_descs), ep->descs, ep->descs_dma_addr); + ep->descs = NULL; } static int ep93xx_alloc_buffers(struct ep93xx_priv *ep) From ad558858295726cb876b78d1c39d471372f1901a Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 2 Dec 2016 14:53:59 -0800 Subject: [PATCH 20/34] uapi: export tc_skbmod.h Fixes commit 735cffe5d800 ("net_sched: Introduce skbmod action") Not used by iproute2 but maybe in future. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild index 9611c7b6c18f..e3db7403296f 100644 --- a/include/uapi/linux/tc_act/Kbuild +++ b/include/uapi/linux/tc_act/Kbuild @@ -12,3 +12,4 @@ header-y += tc_bpf.h header-y += tc_connmark.h header-y += tc_ife.h header-y += tc_tunnel_key.h +header-y += tc_skbmod.h From ffe3bb85c19e1dbf96cc13aad823ae0a8855d066 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 2 Dec 2016 14:54:00 -0800 Subject: [PATCH 21/34] uapi: export nf_log.h File is in uapi directory but not being copied on make install_headers Fixes commit 4ec9c8fbbc22 ("netfilter: nft_log: complete NFTA_LOG_FLAGS attr support"). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index cd26d7a0fd07..03f194aeadc5 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h header-y += nf_conntrack_sctp.h header-y += nf_conntrack_tcp.h header-y += nf_conntrack_tuple_common.h +header-y += nf_log.h header-y += nf_tables.h header-y += nf_tables_compat.h header-y += nf_nat.h From ed5d7788a934a4b6d6d025e948ed4da496b4f12e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 5 Dec 2016 15:28:21 +0800 Subject: [PATCH 22/34] netlink: Do not schedule work from sk_destruct It is wrong to schedule a work from sk_destruct using the socket as the memory reserve because the socket will be freed immediately after the return from sk_destruct. Instead we should do the deferral prior to sk_free. This patch does just that. Fixes: 707693c8a498 ("netlink: Call cb->done from a worker thread") Signed-off-by: Herbert Xu Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 602e5ebe9db3..246f29d365c0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -322,11 +322,13 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) sk_mem_charge(sk, skb->truesize); } -static void __netlink_sock_destruct(struct sock *sk) +static void netlink_sock_destruct(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); if (nlk->cb_running) { + if (nlk->cb.done) + nlk->cb.done(&nlk->cb); module_put(nlk->cb.module); kfree_skb(nlk->cb.skb); } @@ -348,21 +350,7 @@ static void netlink_sock_destruct_work(struct work_struct *work) struct netlink_sock *nlk = container_of(work, struct netlink_sock, work); - nlk->cb.done(&nlk->cb); - __netlink_sock_destruct(&nlk->sk); -} - -static void netlink_sock_destruct(struct sock *sk) -{ - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->cb_running && nlk->cb.done) { - INIT_WORK(&nlk->work, netlink_sock_destruct_work); - schedule_work(&nlk->work); - return; - } - - __netlink_sock_destruct(sk); + sk_free(&nlk->sk); } /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on @@ -667,8 +655,18 @@ out_module: static void deferred_put_nlk_sk(struct rcu_head *head) { struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); + struct sock *sk = &nlk->sk; - sock_put(&nlk->sk); + if (!atomic_dec_and_test(&sk->sk_refcnt)) + return; + + if (nlk->cb_running && nlk->cb.done) { + INIT_WORK(&nlk->work, netlink_sock_destruct_work); + schedule_work(&nlk->work); + return; + } + + sk_free(sk); } static int netlink_release(struct socket *sock) From 6b3374cb1c0bd4699ace03d7e0dc14b532e4f52e Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 5 Dec 2016 18:12:54 +0100 Subject: [PATCH 23/34] net: stmmac: clear reset value of snps, wr_osr_lmt/snps, rd_osr_lmt before writing WR_OSR_LMT and RD_OSR_LMT have a reset value of 1. Since the reset value wasn't cleared before writing, the value in the register would be incorrect if specifying an uneven value for snps,wr_osr_lmt/snps,rd_osr_lmt. Zero is a valid value for the properties, since the databook specifies: maximum outstanding requests = WR_OSR_LMT + 1. We do not want to change the behavior for existing users when the property is missing. Therefore, default to 1 if the property is missing, since that is the same as the reset value. Signed-off-by: Niklas Cassel Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c | 2 ++ drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c | 2 ++ drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 6 ++++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c index 990746955216..f35385266fbf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c @@ -43,9 +43,11 @@ static void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi) if (axi->axi_xit_frm) value |= DMA_AXI_LPI_XIT_FRM; + value &= ~DMA_AXI_WR_OSR_LMT; value |= (axi->axi_wr_osr_lmt & DMA_AXI_WR_OSR_LMT_MASK) << DMA_AXI_WR_OSR_LMT_SHIFT; + value &= ~DMA_AXI_RD_OSR_LMT; value |= (axi->axi_rd_osr_lmt & DMA_AXI_RD_OSR_LMT_MASK) << DMA_AXI_RD_OSR_LMT_SHIFT; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 116151cd6a95..32bc2fc73cdc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -30,9 +30,11 @@ static void dwmac4_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi) if (axi->axi_xit_frm) value |= DMA_AXI_LPI_XIT_FRM; + value &= ~DMA_AXI_WR_OSR_LMT; value |= (axi->axi_wr_osr_lmt & DMA_AXI_OSR_MAX) << DMA_AXI_WR_OSR_LMT_SHIFT; + value &= ~DMA_AXI_RD_OSR_LMT; value |= (axi->axi_rd_osr_lmt & DMA_AXI_OSR_MAX) << DMA_AXI_RD_OSR_LMT_SHIFT; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index a840818bf4df..ac3d39c69509 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -126,8 +126,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev) axi->axi_mb = of_property_read_bool(np, "snps,axi_mb"); axi->axi_rb = of_property_read_bool(np, "snps,axi_rb"); - of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt); - of_property_read_u32(np, "snps,rd_osr_lmt", &axi->axi_rd_osr_lmt); + if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt)) + axi->axi_wr_osr_lmt = 1; + if (of_property_read_u32(np, "snps,rd_osr_lmt", &axi->axi_rd_osr_lmt)) + axi->axi_rd_osr_lmt = 1; of_property_read_u32_array(np, "snps,blen", axi->axi_blen, AXI_BLEN); of_node_put(np); From efc45154828ae4e49c6b46f59882bfef32697d44 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 3 Dec 2016 17:31:45 +0100 Subject: [PATCH 24/34] uapi glibc compat: fix outer guard of net device flags enum Fix a wrong condition preventing the higher net device flags IFF_LOWER_UP etc to be defined if net/if.h is included before linux/if.h. The comment makes it clear the intention was to allow partial definition with either parts. This fixes compilation of userspace programs trying to use IFF_LOWER_UP, IFF_DORMANT or IFF_ECHO. Fixes: 4a91cb61bb99 ("uapi glibc compat: fix compile errors when glibc net/if.h included before linux/if.h") Signed-off-by: Jonas Gorski Reviewed-by: Mikko Rapeli Signed-off-by: David S. Miller --- include/uapi/linux/if.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h index e601c8c3bdc7..1158a043342a 100644 --- a/include/uapi/linux/if.h +++ b/include/uapi/linux/if.h @@ -31,7 +31,7 @@ #include /* For glibc compatibility. An empty enum does not compile. */ -#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && \ +#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 || \ __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 /** * enum net_device_flags - &struct net_device flags @@ -99,7 +99,7 @@ enum net_device_flags { IFF_ECHO = 1<<18, /* volatile */ #endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */ }; -#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */ +#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 || __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */ /* for compatibility with glibc net/if.h */ #if __UAPI_DEF_IF_NET_DEVICE_FLAGS From dcb17d22e1c2cd72e72190c736349a675362b3bc Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 5 Dec 2016 18:37:13 -0200 Subject: [PATCH 25/34] tcp: warn on bogus MSS and try to amend it There have been some reports lately about TCP connection stalls caused by NIC drivers that aren't setting gso_size on aggregated packets on rx path. This causes TCP to assume that the MSS is actually the size of the aggregated packet, which is invalid. Although the proper fix is to be done at each driver, it's often hard and cumbersome for one to debug, come to such root cause and report/fix it. This patch amends this situation in two ways. First, it adds a warning on when this situation occurs, so it gives a hint to those trying to debug this. It also limit the maximum probed MSS to the adverised MSS, as it should never be any higher than that. The result is that the connection may not have the best performance ever but it shouldn't stall, and the admin will have a hint on what to look for. Tested with virtio by forcing gso_size to 0. v2: updated msg per David's suggestion v3: use skb_iif to find the interface and also log its name, per Eric Dumazet's suggestion. As the skb may be backlogged and the interface gone by then, we need to check if the number still has a meaning. v4: use helper tcp_gro_dev_warn() and avoid pr_warn_once inside __once, per David's suggestion Cc: Jonathan Maxwell Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a27b9c0e27c0..c71d49ce0c93 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -128,6 +128,23 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define REXMIT_LOST 1 /* retransmit packets marked lost */ #define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */ +static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb) +{ + static bool __once __read_mostly; + + if (!__once) { + struct net_device *dev; + + __once = true; + + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); + pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", + dev ? dev->name : "Unknown driver"); + rcu_read_unlock(); + } +} + /* Adapt the MSS value used to make delayed ack decision to the * real world. */ @@ -144,7 +161,10 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) */ len = skb_shinfo(skb)->gso_size ? : skb->len; if (len >= icsk->icsk_ack.rcv_mss) { - icsk->icsk_ack.rcv_mss = len; + icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, + tcp_sk(sk)->advmss); + if (unlikely(icsk->icsk_ack.rcv_mss != len)) + tcp_gro_dev_warn(sk, skb); } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. From e37e2ff350a321ad9c36b588e76f34fbba305be6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 5 Dec 2016 18:10:58 -0800 Subject: [PATCH 26/34] virtio-net: Fix DMA-from-the-stack in virtnet_set_mac_address() With CONFIG_VMAP_STACK=y, virtnet_set_mac_address() can be passed a pointer to the stack and it will OOPS. Copy the address to the heap to prevent the crash. Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Laura Abbott Reported-by: zbyszek@in.waw.pl Signed-off-by: Andy Lutomirski Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7276d5a95bd0..cbf1c613c67a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -969,12 +969,17 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) struct virtnet_info *vi = netdev_priv(dev); struct virtio_device *vdev = vi->vdev; int ret; - struct sockaddr *addr = p; + struct sockaddr *addr; struct scatterlist sg; - ret = eth_prepare_mac_addr_change(dev, p); + addr = kmalloc(sizeof(*addr), GFP_KERNEL); + if (!addr) + return -ENOMEM; + memcpy(addr, p, sizeof(*addr)); + + ret = eth_prepare_mac_addr_change(dev, addr); if (ret) - return ret; + goto out; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { sg_init_one(&sg, addr->sa_data, dev->addr_len); @@ -982,7 +987,8 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { dev_warn(&vdev->dev, "Failed to set mac address by vq command.\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { @@ -996,8 +1002,11 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) } eth_commit_mac_addr_change(dev, p); + ret = 0; - return 0; +out: + kfree(addr); + return ret; } static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, From d14584d91976c42c7178164665c4959495740939 Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Tue, 6 Dec 2016 00:33:50 -0500 Subject: [PATCH 27/34] be2net: Add DEVSEC privilege to SET_HSW_CONFIG command. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OPCODE_COMMON_GET_FN_PRIVILEGES is returning only DEVSEC privilege (Unrestricted Administrative Privilege) for Lancer NIC functions. So, driver is failing SET_HSW_CONFIG command, as DEVSEC privilege was not set in the privilege bitmap. This patch fixes the problem by setting DEVSEC privilege in SET_HSW_CONFIG’s privilege bitmap. Signed-off-by: Venkat Duvvuru Signed-off-by: Suresh Reddy Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 1fb5d7239254..0e74529a4209 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -90,7 +90,8 @@ static struct be_cmd_priv_map cmd_priv_map[] = { { OPCODE_COMMON_SET_HSW_CONFIG, CMD_SUBSYSTEM_COMMON, - BE_PRIV_DEVCFG | BE_PRIV_VHADM + BE_PRIV_DEVCFG | BE_PRIV_VHADM | + BE_PRIV_DEVSEC }, { OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES, From f85de6666347c974cdf97b1026180995d912d7d0 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Tue, 6 Dec 2016 09:26:53 +0300 Subject: [PATCH 28/34] net: fec: fix compile with CONFIG_M5272 Commit 80cca775cdc4 ("net: fec: cache statistics while device is down") introduced unconditional statistics-related actions. However, when driver is compiled with CONFIG_M5272, staticsics-related definitions do not exist, which results into build errors. Fix that by adding explicit handling of !defined(CONFIG_M5272) case. Fixes: 80cca775cdc4 ("net: fec: cache statistics while device is down") Signed-off-by: Nikita Yushchenko Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 5f77caa59534..12aef1b15356 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2313,6 +2313,8 @@ static const struct fec_stat { { "IEEE_rx_octets_ok", IEEE_R_OCTETS_OK }, }; +#define FEC_STATS_SIZE (ARRAY_SIZE(fec_stats) * sizeof(u64)) + static void fec_enet_update_ethtool_stats(struct net_device *dev) { struct fec_enet_private *fep = netdev_priv(dev); @@ -2330,7 +2332,7 @@ static void fec_enet_get_ethtool_stats(struct net_device *dev, if (netif_running(dev)) fec_enet_update_ethtool_stats(dev); - memcpy(data, fep->ethtool_stats, ARRAY_SIZE(fec_stats) * sizeof(u64)); + memcpy(data, fep->ethtool_stats, FEC_STATS_SIZE); } static void fec_enet_get_strings(struct net_device *netdev, @@ -2355,6 +2357,12 @@ static int fec_enet_get_sset_count(struct net_device *dev, int sset) return -EOPNOTSUPP; } } + +#else /* !defined(CONFIG_M5272) */ +#define FEC_STATS_SIZE 0 +static inline void fec_enet_update_ethtool_stats(struct net_device *dev) +{ +} #endif /* !defined(CONFIG_M5272) */ static int fec_enet_nway_reset(struct net_device *dev) @@ -3293,8 +3301,7 @@ fec_probe(struct platform_device *pdev) /* Init network device */ ndev = alloc_etherdev_mqs(sizeof(struct fec_enet_private) + - ARRAY_SIZE(fec_stats) * sizeof(u64), - num_tx_qs, num_rx_qs); + FEC_STATS_SIZE, num_tx_qs, num_rx_qs); if (!ndev) return -ENOMEM; From f663ad98623926b8d7bdef4b4648d10c0229aebe Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 6 Dec 2016 17:32:43 +0200 Subject: [PATCH 29/34] net/mlx5: Verify module parameters Verify the mlx5_core module parameters by making sure that they are in the expected range and if they aren't restore them to their default values. Fixes: 9603b61de1ee ('mlx5: Move pci device handling from mlx5_ib to mlx5_core') Signed-off-by: Kamal Heib Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/main.c | 27 +++++++++++-------- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 3b7c6a9f2b5f..22eb3be06651 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -62,13 +62,13 @@ MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRIVER_VERSION); -int mlx5_core_debug_mask; -module_param_named(debug_mask, mlx5_core_debug_mask, int, 0644); +unsigned int mlx5_core_debug_mask; +module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644); MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0"); #define MLX5_DEFAULT_PROF 2 -static int prof_sel = MLX5_DEFAULT_PROF; -module_param_named(prof_sel, prof_sel, int, 0444); +static unsigned int prof_sel = MLX5_DEFAULT_PROF; +module_param_named(prof_sel, prof_sel, uint, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); enum { @@ -1227,13 +1227,6 @@ static int init_one(struct pci_dev *pdev, dev->pdev = pdev; dev->event = mlx5_core_event; - - if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) { - mlx5_core_warn(dev, - "selected profile out of range, selecting default (%d)\n", - MLX5_DEFAULT_PROF); - prof_sel = MLX5_DEFAULT_PROF; - } dev->profile = &profile[prof_sel]; INIT_LIST_HEAD(&priv->ctx_list); @@ -1450,10 +1443,22 @@ static struct pci_driver mlx5_core_driver = { .sriov_configure = mlx5_core_sriov_configure, }; +static void mlx5_core_verify_params(void) +{ + if (prof_sel >= ARRAY_SIZE(profile)) { + pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n", + prof_sel, + ARRAY_SIZE(profile) - 1, + MLX5_DEFAULT_PROF); + prof_sel = MLX5_DEFAULT_PROF; + } +} + static int __init init(void) { int err; + mlx5_core_verify_params(); mlx5_register_debugfs(); err = pci_register_driver(&mlx5_core_driver); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 187662c8ea96..20d16b137e03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -44,7 +44,7 @@ #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev)) -extern int mlx5_core_debug_mask; +extern uint mlx5_core_debug_mask; #define mlx5_core_dbg(__dev, format, ...) \ dev_dbg(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ From 9e5b2fc1d39b3122e2028849d0edc5df1d1a4761 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 6 Dec 2016 17:32:44 +0200 Subject: [PATCH 30/34] net/mlx5: Remove duplicate pci dev name print Remove duplicate pci dev name printing from mlx5_core_warn/dbg. Fixes: 5a7883989b1c ('net/mlx5_core: Improve mlx5 messages') Signed-off-by: Kamal Heib Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 20d16b137e03..2ce03464e622 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -47,8 +47,8 @@ extern uint mlx5_core_debug_mask; #define mlx5_core_dbg(__dev, format, ...) \ - dev_dbg(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ - (__dev)->priv.name, __func__, __LINE__, current->pid, \ + dev_dbg(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ ##__VA_ARGS__) #define mlx5_core_dbg_mask(__dev, mask, format, ...) \ @@ -63,8 +63,8 @@ do { \ ##__VA_ARGS__) #define mlx5_core_warn(__dev, format, ...) \ - dev_warn(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ - (__dev)->priv.name, __func__, __LINE__, current->pid, \ + dev_warn(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ ##__VA_ARGS__) #define mlx5_core_info(__dev, format, ...) \ From f9c14e46748be9a2adafdb7d216f6cdeb435aadc Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 6 Dec 2016 17:32:45 +0200 Subject: [PATCH 31/34] net/mlx5: Fix query ISSI flow In old FWs query ISSI command is not supported and for some of those FWs it might fail with status other than "MLX5_CMD_STAT_BAD_OP_ERR". In such case instead of failing the driver load, we will treat any FW status other than 0 for Query ISSI FW command as ISSI not supported and assume ISSI=0 (most basic driver/FW interface). In case of driver syndrom (query ISSI failure by driver) we will fail driver load. Fixes: f62b8bb8f2d3 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality') Signed-off-by: Kamal Heib Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 5 ----- drivers/net/ethernet/mellanox/mlx5/core/main.c | 15 +++++++++------ .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 5 +++++ 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 1e639f886021..bfe410e8a469 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -268,11 +268,6 @@ static void dump_buf(void *buf, int size, int data_only, int offset) pr_debug("\n"); } -enum { - MLX5_DRIVER_STATUS_ABORTED = 0xfe, - MLX5_DRIVER_SYND = 0xbadd00de, -}; - static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, u32 *synd, u8 *status) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 22eb3be06651..ada24e103b02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -732,13 +732,15 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) u8 status; mlx5_cmd_mbox_status(query_out, &status, &syndrome); - if (status == MLX5_CMD_STAT_BAD_OP_ERR) { - pr_debug("Only ISSI 0 is supported\n"); - return 0; + if (!status || syndrome == MLX5_DRIVER_SYND) { + mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n", + err, status, syndrome); + return err; } - pr_err("failed to query ISSI err(%d)\n", err); - return err; + mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n"); + dev->issi = 0; + return 0; } sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); @@ -752,7 +754,8 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) err = mlx5_cmd_exec(dev, set_in, sizeof(set_in), set_out, sizeof(set_out)); if (err) { - pr_err("failed to set ISSI=1 err(%d)\n", err); + mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n", + err); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 2ce03464e622..63b9a0dba885 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -75,6 +75,11 @@ enum { MLX5_CMD_TIME, /* print command execution time */ }; +enum { + MLX5_DRIVER_STATUS_ABORTED = 0xfe, + MLX5_DRIVER_SYND = 0xbadd00de, +}; + int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); From b8335d91b472289939e26428dfa88c54aee3b739 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 6 Dec 2016 17:32:46 +0200 Subject: [PATCH 32/34] net/mlx5e: Don't notify HW when filling the edge of ICO SQ We are going to do this a couple of steps ahead anyway. Fixes: d3c9bc2743dc ("net/mlx5e: Added ICO SQs") Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index c6de6fba5843..e9abb6dfe393 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -340,7 +340,7 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; sq->db.ico_wqe[pi].num_wqebbs = 1; - mlx5e_send_nop(sq, true); + mlx5e_send_nop(sq, false); } wqe = mlx5_wq_cyc_get_wqe(wq, pi); From 3c8591d593a3da9ae8e8342acb1f6ab9ab478e92 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 6 Dec 2016 17:32:47 +0200 Subject: [PATCH 33/34] net/mlx5e: Don't flush SQ on error We are doing SQ descriptors cleanup in driver. Fixes: 6e8dd6d6f4bd ("net/mlx5e: Don't wait for SQ completions on close") Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 84e8b250e2af..5bf7f86fe31c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1006,7 +1006,6 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); - MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, uar_page, sq->uar.index); From c0f1147d14e4b09018a495c5095094e5707a4f44 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 6 Dec 2016 17:32:48 +0200 Subject: [PATCH 34/34] net/mlx5e: Change the SQ/RQ operational state to positive logic When using the negative logic (i.e. FLUSH state), after the RQ/SQ reopen we will have a time interval that the RQ/SQ is not really ready and the state indicates that its not in FLUSH state because the initial SQ/RQ struct memory starts as zeros. Now we changed the state to indicate if the SQ/RQ is opened and we will set the READY state after finishing preparing all the SQ/RQ resources. Fixes: 6e8dd6d6f4bd ("net/mlx5e: Don't wait for SQ completions on close") Fixes: f2fde18c52a7 ("net/mlx5e: Don't wait for RQ completions on close") Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 14 +++++++++----- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 4 ++-- 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7a43502a89cc..71382df59fc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -241,7 +241,7 @@ struct mlx5e_tstamp { }; enum { - MLX5E_RQ_STATE_FLUSH, + MLX5E_RQ_STATE_ENABLED, MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, MLX5E_RQ_STATE_AM, }; @@ -394,7 +394,7 @@ struct mlx5e_sq_dma { }; enum { - MLX5E_SQ_STATE_FLUSH, + MLX5E_SQ_STATE_ENABLED, MLX5E_SQ_STATE_BF_ENABLE, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5bf7f86fe31c..246d98ebb588 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -759,6 +759,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (err) goto err_destroy_rq; + set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) goto err_disable_rq; @@ -773,6 +774,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, return 0; err_disable_rq: + clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); mlx5e_disable_rq(rq); err_destroy_rq: mlx5e_destroy_rq(rq); @@ -782,7 +784,7 @@ err_destroy_rq: static void mlx5e_close_rq(struct mlx5e_rq *rq) { - set_bit(MLX5E_RQ_STATE_FLUSH, &rq->state); + clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ cancel_work_sync(&rq->am.work); @@ -1082,6 +1084,7 @@ static int mlx5e_open_sq(struct mlx5e_channel *c, if (err) goto err_destroy_sq; + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, false, 0); if (err) @@ -1095,6 +1098,7 @@ static int mlx5e_open_sq(struct mlx5e_channel *c, return 0; err_disable_sq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); mlx5e_disable_sq(sq); err_destroy_sq: mlx5e_destroy_sq(sq); @@ -1111,7 +1115,7 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_close_sq(struct mlx5e_sq *sq) { - set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state); + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); /* prevent netif_tx_wake_queue */ napi_synchronize(&sq->channel->napi); @@ -3091,7 +3095,7 @@ static void mlx5e_tx_timeout(struct net_device *dev) if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) continue; sched_work = true; - set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state); + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n", i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc); } @@ -3146,13 +3150,13 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) for (i = 0; i < priv->params.num_channels; i++) { struct mlx5e_channel *c = priv->channel[i]; - set_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); napi_synchronize(&c->napi); /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ old_prog = xchg(&c->rq.xdp_prog, prog); - clear_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); /* napi_schedule in case we have missed anything */ set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); napi_schedule(&c->napi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e9abb6dfe393..33495d88aeb2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -412,7 +412,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); - if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) { mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); return; } @@ -445,7 +445,7 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) } #define RQ_CANNOT_POST(rq) \ - (test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state) || \ + (!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state) || \ test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) @@ -924,7 +924,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq; int work_done = 0; - if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return 0; if (cq->decmprs_left) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 70a717382357..cfb68371c397 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -409,7 +409,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sq = container_of(cq, struct mlx5e_sq, cq); - if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return false; npkts = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 5703f19a6a24..e5c12a732aa1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -56,7 +56,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) struct mlx5_cqe64 *cqe; u16 sqcc; - if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return; cqe = mlx5e_get_cqe(cq); @@ -113,7 +113,7 @@ static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) sq = container_of(cq, struct mlx5e_sq, cq); - if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return false; /* sq->cc must be updated only after mlx5_cqwq_update_db_record(),