From a82c25c366b0963d33ddf699196e6cf57f6d89b1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Mar 2022 13:52:11 +0100 Subject: [PATCH 01/26] Revert "netfilter: nat: force port remap to prevent shadowing well-known ports" This reverts commit 878aed8db324bec64f3c3f956e64d5ae7375a5de. This change breaks existing setups where conntrack is used with asymmetric paths. In these cases, the NAT transformation occurs on the syn-ack instead of the syn: 1. SYN x:12345 -> y -> 443 // sent by initiator, receiverd by responder 2. SYNACK y:443 -> x:12345 // First packet seen by conntrack, as sent by responder 3. tuple_force_port_remap() gets called, sees: 'tcp from 443 to port 12345 NAT' -> pick a new source port, inititor receives 4. SYNACK y:$RANDOM -> x:12345 // connection is never established While its possible to avoid the breakage with NOTRACK rules, a kernel update should not break working setups. An alternative to the revert is to augment conntrack to tag mid-stream connections plus more code in the nat core to skip NAT for such connections, however, this leads to more interaction/integration between conntrack and NAT. Therefore, revert, users will need to add explicit nat rules to avoid port shadowing. Link: https://lore.kernel.org/netfilter-devel/20220302105908.GA5852@breakpoint.cc/#R Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2051413 Signed-off-by: Florian Westphal --- net/netfilter/nf_nat_core.c | 43 ++------------------ tools/testing/selftests/netfilter/nft_nat.sh | 5 +-- 2 files changed, 5 insertions(+), 43 deletions(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 2d06a66899b2..ffcf6529afc3 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -494,38 +494,6 @@ another_round: goto another_round; } -static bool tuple_force_port_remap(const struct nf_conntrack_tuple *tuple) -{ - u16 sp, dp; - - switch (tuple->dst.protonum) { - case IPPROTO_TCP: - sp = ntohs(tuple->src.u.tcp.port); - dp = ntohs(tuple->dst.u.tcp.port); - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - sp = ntohs(tuple->src.u.udp.port); - dp = ntohs(tuple->dst.u.udp.port); - break; - default: - return false; - } - - /* IANA: System port range: 1-1023, - * user port range: 1024-49151, - * private port range: 49152-65535. - * - * Linux default ephemeral port range is 32768-60999. - * - * Enforce port remapping if sport is significantly lower - * than dport to prevent NAT port shadowing, i.e. - * accidental match of 'new' inbound connection vs. - * existing outbound one. - */ - return sp < 16384 && dp >= 32768; -} - /* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, * we change the source to map into the range. For NF_INET_PRE_ROUTING * and NF_INET_LOCAL_OUT, we change the destination to map into the @@ -539,17 +507,11 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { - bool random_port = range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL; const struct nf_conntrack_zone *zone; struct net *net = nf_ct_net(ct); zone = nf_ct_zone(ct); - if (maniptype == NF_NAT_MANIP_SRC && - !random_port && - !ct->local_origin) - random_port = tuple_force_port_remap(orig_tuple); - /* 1) If this srcip/proto/src-proto-part is currently mapped, * and that same mapping gives a unique tuple within the given * range, use that. @@ -558,7 +520,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, * So far, we don't do local source mappings, so multiple * manips not an issue. */ - if (maniptype == NF_NAT_MANIP_SRC && !random_port) { + if (maniptype == NF_NAT_MANIP_SRC && + !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { /* try the original tuple first */ if (in_range(orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { @@ -582,7 +545,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, */ /* Only bother mapping if it's not already in range and unique */ - if (!random_port) { + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) { if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && l4proto_in_range(tuple, maniptype, diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 79fe627b9e81..eb8543b9a5c4 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -880,9 +880,8 @@ EOF return $ksft_skip fi - # test default behaviour. Packet from ns1 to ns0 is not redirected - # due to automatic port translation. - test_port_shadow "default" "ROUTER" + # test default behaviour. Packet from ns1 to ns0 is redirected to ns2. + test_port_shadow "default" "CLIENT" # test packet filter based mitigation: prevent forwarding of # packets claiming to come from the service port. From ee0a4dc9f317fb9a97f20037d219802ca8de939b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Mar 2022 17:28:38 +0100 Subject: [PATCH 02/26] Revert "netfilter: conntrack: tag conntracks picked up in local out hook" This was a prerequisite for the ill-fated "netfilter: nat: force port remap to prevent shadowing well-known ports". As this has been reverted, this change can be backed out too. Signed-off-by: Florian Westphal --- include/net/netfilter/nf_conntrack.h | 1 - net/netfilter/nf_conntrack_core.c | 3 --- 2 files changed, 4 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 8731d5bcb47d..b08b70989d2c 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -97,7 +97,6 @@ struct nf_conn { unsigned long status; u16 cpu; - u16 local_origin:1; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index d6aa5b47031e..bf1e17c678f1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1748,9 +1748,6 @@ resolve_normal_ct(struct nf_conn *tmpl, return 0; if (IS_ERR(h)) return PTR_ERR(h); - - ct = nf_ct_tuplehash_to_ctrack(h); - ct->local_origin = state->hook == NF_INET_LOCAL_OUT; } ct = nf_ct_tuplehash_to_ctrack(h); From 9a564bccb78a76740ea9d75a259942df8143d02c Mon Sep 17 00:00:00 2001 From: Haimin Zhang Date: Tue, 8 Mar 2022 11:20:28 +0800 Subject: [PATCH 03/26] af_key: add __GFP_ZERO flag for compose_sadb_supported in function pfkey_register Add __GFP_ZERO flag for compose_sadb_supported in function pfkey_register to initialize the buffer of supp_skb to fix a kernel-info-leak issue. 1) Function pfkey_register calls compose_sadb_supported to request a sk_buff. 2) compose_sadb_supported calls alloc_sbk to allocate a sk_buff, but it doesn't zero it. 3) If auth_len is greater 0, then compose_sadb_supported treats the memory as a struct sadb_supported and begins to initialize. But it just initializes the field sadb_supported_len and field sadb_supported_exttype without field sadb_supported_reserved. Reported-by: TCS Robot Signed-off-by: Haimin Zhang Signed-off-by: Steffen Klassert --- net/key/af_key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 9bf52a09b5ff..fd51db3be91c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1699,7 +1699,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad xfrm_probe_algs(); - supp_skb = compose_sadb_supported(hdr, GFP_KERNEL); + supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO); if (!supp_skb) { if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) pfk->registered &= ~(1<sadb_msg_satype); From 5e34af4142ffe68f01c8a9acae83300f8911e20c Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Thu, 10 Mar 2022 15:25:38 -0800 Subject: [PATCH 04/26] net: ipv6: fix skb_over_panic in __ip6_append_data Syzbot found a kernel bug in the ipv6 stack: LINK: https://syzkaller.appspot.com/bug?id=205d6f11d72329ab8d62a610c44c5e7e25415580 The reproducer triggers it by sending a crafted message via sendmmsg() call, which triggers skb_over_panic, and crashes the kernel: skbuff: skb_over_panic: text:ffffffff84647fb4 len:65575 put:65575 head:ffff888109ff0000 data:ffff888109ff0088 tail:0x100af end:0xfec0 dev: Update the check that prevents an invalid packet with MTU equal to the fregment header size to eat up all the space for payload. The reproducer can be found here: LINK: https://syzkaller.appspot.com/text?tag=ReproC&x=1648c83fb00000 Reported-by: syzbot+e223cf47ec8ae183f2a0@syzkaller.appspotmail.com Signed-off-by: Tadeusz Struk Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20220310232538.1044947-1-tadeusz.struk@linaro.org Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4788f6b37053..194832663d85 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1476,8 +1476,8 @@ static int __ip6_append_data(struct sock *sk, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; - if (mtu < fragheaderlen || - ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr)) + if (mtu <= fragheaderlen || + ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) goto emsgsize; maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - From 46b348fd2d81a341b15fb3f3f986204b038f5c42 Mon Sep 17 00:00:00 2001 From: Niels Dossche Date: Fri, 11 Mar 2022 00:27:08 +0100 Subject: [PATCH 05/26] alx: acquire mutex for alx_reinit in alx_change_mtu alx_reinit has a lockdep assertion that the alx->mtx mutex must be held. alx_reinit is called from two places: alx_reset and alx_change_mtu. alx_reset does acquire alx->mtx before calling alx_reinit. alx_change_mtu does not acquire this mutex, nor do its callers or any path towards alx_change_mtu. Acquire the mutex in alx_change_mtu. The issue was introduced when the fine-grained locking was introduced to the code to replace the RTNL. The same commit also introduced the lockdep assertion. Fixes: 4a5fe57e7751 ("alx: use fine-grained locking instead of RTNL") Signed-off-by: Niels Dossche Link: https://lore.kernel.org/r/20220310232707.44251-1-dossche.niels@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/atheros/alx/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 4ad3fc72e74e..a89b93cb4e26 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1181,8 +1181,11 @@ static int alx_change_mtu(struct net_device *netdev, int mtu) alx->hw.mtu = mtu; alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE); netdev_update_features(netdev); - if (netif_running(netdev)) + if (netif_running(netdev)) { + mutex_lock(&alx->mtx); alx_reinit(alx); + mutex_unlock(&alx->mtx); + } return 0; } From 8e6ed963763fe21429eabfc76c69ce2b0163a3dd Mon Sep 17 00:00:00 2001 From: Jiyong Park Date: Fri, 11 Mar 2022 11:00:16 +0900 Subject: [PATCH 06/26] vsock: each transport cycles only on its own sockets When iterating over sockets using vsock_for_each_connected_socket, make sure that a transport filters out sockets that don't belong to the transport. There actually was an issue caused by this; in a nested VM configuration, destroying the nested VM (which often involves the closing of /dev/vhost-vsock if there was h2g connections to the nested VM) kills not only the h2g connections, but also all existing g2h connections to the (outmost) host which are totally unrelated. Tested: Executed the following steps on Cuttlefish (Android running on a VM) [1]: (1) Enter into an `adb shell` session - to have a g2h connection inside the VM, (2) open and then close /dev/vhost-vsock by `exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb session is not reset. [1] https://android.googlesource.com/device/google/cuttlefish/ Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Reviewed-by: Stefano Garzarella Acked-by: Michael S. Tsirkin Signed-off-by: Jiyong Park Link: https://lore.kernel.org/r/20220311020017.1509316-1-jiyong@google.com Signed-off-by: Jakub Kicinski --- drivers/vhost/vsock.c | 3 ++- include/net/af_vsock.h | 3 ++- net/vmw_vsock/af_vsock.c | 9 +++++++-- net/vmw_vsock/virtio_transport.c | 7 +++++-- net/vmw_vsock/vmci_transport.c | 5 ++++- 5 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 37f0b4274113..e6c9d41db1de 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -753,7 +753,8 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) /* Iterating over all connections for all CIDs to find orphans is * inefficient. Room for improvement here. */ - vsock_for_each_connected_socket(vhost_vsock_reset_orphans); + vsock_for_each_connected_socket(&vhost_transport.transport, + vhost_vsock_reset_orphans); /* Don't check the owner, because we are in the release path, so we * need to stop the vsock device in any case. diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index ab207677e0a8..f742e50207fb 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_remove_sock(struct vsock_sock *vsk); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +void vsock_for_each_connected_socket(struct vsock_transport *transport, + void (*fn)(struct sock *sk)); int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); bool vsock_find_cid(unsigned int cid); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 38baeb189d4e..f04abf662ec6 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_remove_sock); -void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) +void vsock_for_each_connected_socket(struct vsock_transport *transport, + void (*fn)(struct sock *sk)) { int i; @@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) { struct vsock_sock *vsk; list_for_each_entry(vsk, &vsock_connected_table[i], - connected_table) + connected_table) { + if (vsk->transport != transport) + continue; + fn(sk_vsock(vsk)); + } } spin_unlock_bh(&vsock_table_lock); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index fb3302fff627..5afc194a58bb 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -24,6 +24,7 @@ static struct workqueue_struct *virtio_vsock_workqueue; static struct virtio_vsock __rcu *the_virtio_vsock; static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ +static struct virtio_transport virtio_transport; /* forward declaration */ struct virtio_vsock { struct virtio_device *vdev; @@ -384,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock *vsock, switch (le32_to_cpu(event->id)) { case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: virtio_vsock_update_guest_cid(vsock); - vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vsock_for_each_connected_socket(&virtio_transport.transport, + virtio_vsock_reset_sock); break; } } @@ -662,7 +664,8 @@ static void virtio_vsock_remove(struct virtio_device *vdev) synchronize_rcu(); /* Reset all connected sockets when the device disappear */ - vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vsock_for_each_connected_socket(&virtio_transport.transport, + virtio_vsock_reset_sock); /* Stop all work handlers to make sure no one is accessing the device, * so we can safely call virtio_reset_device(). diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 7aef34e32bdf..b17dc9745188 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -75,6 +75,8 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; static int PROTOCOL_OVERRIDE = -1; +static struct vsock_transport vmci_transport; /* forward declaration */ + /* Helper function to convert from a VMCI error code to a VSock error code. */ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) @@ -882,7 +884,8 @@ static void vmci_transport_qp_resumed_cb(u32 sub_id, const struct vmci_event_data *e_data, void *client_data) { - vsock_for_each_connected_socket(vmci_transport_handle_detach); + vsock_for_each_connected_socket(&vmci_transport, + vmci_transport_handle_detach); } static void vmci_transport_recv_pkt_work(struct work_struct *work) From ed5f85d4229010235eab1e3d9acf6970d9304963 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 12 Mar 2022 11:05:46 +0100 Subject: [PATCH 07/26] netfilter: nf_tables: disable register tracking The register tracking infrastructure is incomplete, it might lead to generating incorrect ruleset bytecode, disable it by now given we are late in the release process. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c86748b3873b..d71a33ae39b3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8260,6 +8260,12 @@ void nf_tables_trans_destroy_flush_work(void) } EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); +static bool nft_expr_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + return false; +} + static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) { const struct nft_expr *expr, *last; @@ -8307,8 +8313,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha nft_rule_for_each_expr(expr, last, rule) { track.cur = expr; - if (expr->ops->reduce && - expr->ops->reduce(&track, expr)) { + if (nft_expr_reduce(&track, expr)) { expr = track.cur; continue; } From e981bc74aefc6a177b50c16cfa7023599799cf74 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 11 Mar 2022 13:17:16 +0200 Subject: [PATCH 08/26] net: dsa: microchip: add spi_device_id tables Add spi_device_id tables to avoid logs like "SPI driver ksz9477-switch has no spi_device_id". Signed-off-by: Claudiu Beznea Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795_spi.c | 11 +++++++++++ drivers/net/dsa/microchip/ksz9477_spi.c | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c index 866767b70d65..b0a7dee27ffc 100644 --- a/drivers/net/dsa/microchip/ksz8795_spi.c +++ b/drivers/net/dsa/microchip/ksz8795_spi.c @@ -124,12 +124,23 @@ static const struct of_device_id ksz8795_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, ksz8795_dt_ids); +static const struct spi_device_id ksz8795_spi_ids[] = { + { "ksz8765" }, + { "ksz8794" }, + { "ksz8795" }, + { "ksz8863" }, + { "ksz8873" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, ksz8795_spi_ids); + static struct spi_driver ksz8795_spi_driver = { .driver = { .name = "ksz8795-switch", .owner = THIS_MODULE, .of_match_table = of_match_ptr(ksz8795_dt_ids), }, + .id_table = ksz8795_spi_ids, .probe = ksz8795_spi_probe, .remove = ksz8795_spi_remove, .shutdown = ksz8795_spi_shutdown, diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index e3cb0e6c9f6f..43addeabfc25 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -98,12 +98,24 @@ static const struct of_device_id ksz9477_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, ksz9477_dt_ids); +static const struct spi_device_id ksz9477_spi_ids[] = { + { "ksz9477" }, + { "ksz9897" }, + { "ksz9893" }, + { "ksz9563" }, + { "ksz8563" }, + { "ksz9567" }, + { }, +}; +MODULE_DEVICE_TABLE(spi, ksz9477_spi_ids); + static struct spi_driver ksz9477_spi_driver = { .driver = { .name = "ksz9477-switch", .owner = THIS_MODULE, .of_match_table = of_match_ptr(ksz9477_dt_ids), }, + .id_table = ksz9477_spi_ids, .probe = ksz9477_spi_probe, .remove = ksz9477_spi_remove, .shutdown = ksz9477_spi_shutdown, From 4db4075f92af2b28f415fc979ab626e6b37d67b6 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 10 Mar 2022 11:49:00 +0100 Subject: [PATCH 09/26] esp6: fix check on ipv6_skip_exthdr's return value Commit 5f9c55c8066b ("ipv6: check return value of ipv6_skip_exthdr") introduced an incorrect check, which leads to all ESP packets over either TCPv6 or UDPv6 encapsulation being dropped. In this particular case, offset is negative, since skb->data points to the ESP header in the following chain of headers, while skb->network_header points to the IPv6 header: IPv6 | ext | ... | ext | UDP | ESP | ... That doesn't seem to be a problem, especially considering that if we reach esp6_input_done2, we're guaranteed to have a full set of headers available (otherwise the packet would have been dropped earlier in the stack). However, it means that the return value will (intentionally) be negative. We can make the test more specific, as the expected return value of ipv6_skip_exthdr will be the (negated) size of either a UDP header, or a TCP header with possible options. In the future, we should probably either make ipv6_skip_exthdr explicitly accept negative offsets (and adjust its return value for error cases), or make ipv6_skip_exthdr only take non-negative offsets (and audit all callers). Fixes: 5f9c55c8066b ("ipv6: check return value of ipv6_skip_exthdr") Reported-by: Xiumei Mu Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/ipv6/esp6.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b0ffbcd5432d..55d604c9b3b3 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -812,8 +812,7 @@ int esp6_input_done2(struct sk_buff *skb, int err) struct tcphdr *th; offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); - - if (offset < 0) { + if (offset == -1) { err = -EINVAL; goto out; } From 837d9e49402eaf030db55a49f96fc51d73b4b441 Mon Sep 17 00:00:00 2001 From: Kurt Cancemi Date: Sat, 12 Mar 2022 15:15:13 -0500 Subject: [PATCH 10/26] net: phy: marvell: Fix invalid comparison in the resume and suspend functions This bug resulted in only the current mode being resumed and suspended when the PHY supported both fiber and copper modes and when the PHY only supported copper mode the fiber mode would incorrectly be attempted to be resumed and suspended. Fixes: 3758be3dc162 ("Marvell phy: add functions to suspend and resume both interfaces: fiber and copper links.") Signed-off-by: Kurt Cancemi Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220312201512.326047-1-kurt@x64architecture.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/marvell.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 2429db614b59..2702faf7b0f6 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1687,8 +1687,8 @@ static int marvell_suspend(struct phy_device *phydev) int err; /* Suspend the fiber mode first */ - if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, - phydev->supported)) { + if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported)) { err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) goto error; @@ -1722,8 +1722,8 @@ static int marvell_resume(struct phy_device *phydev) int err; /* Resume the fiber mode first */ - if (!linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, - phydev->supported)) { + if (linkmode_test_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, + phydev->supported)) { err = marvell_set_page(phydev, MII_MARVELL_FIBER_PAGE); if (err < 0) goto error; From 0f8946ae704ac6880c590beb91bc3a732595a28a Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Sat, 12 Mar 2022 23:41:40 +0100 Subject: [PATCH 11/26] net: mdio: mscc-miim: fix duplicate debugfs entry This driver can have up to two regmaps. If the second one is registered its debugfs entry will have the same name as the first one and the following error will be printed: [ 3.833521] debugfs: Directory 'e200413c.mdio' with parent 'regmap' already present! Give the second regmap a name to avoid this. Fixes: a27a76282837 ("net: mdio: mscc-miim: convert to a regmap implementation") Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20220312224140.4173930-1-michael@walle.cc Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-mscc-miim.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/mdio/mdio-mscc-miim.c b/drivers/net/mdio/mdio-mscc-miim.c index 7d2abaf2b2c9..64fb76c1e395 100644 --- a/drivers/net/mdio/mdio-mscc-miim.c +++ b/drivers/net/mdio/mdio-mscc-miim.c @@ -187,6 +187,13 @@ static const struct regmap_config mscc_miim_regmap_config = { .reg_stride = 4, }; +static const struct regmap_config mscc_miim_phy_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .name = "phy", +}; + int mscc_miim_setup(struct device *dev, struct mii_bus **pbus, const char *name, struct regmap *mii_regmap, int status_offset) { @@ -250,7 +257,7 @@ static int mscc_miim_probe(struct platform_device *pdev) } phy_regmap = devm_regmap_init_mmio(&pdev->dev, phy_regs, - &mscc_miim_regmap_config); + &mscc_miim_phy_regmap_config); if (IS_ERR(phy_regmap)) { dev_err(&pdev->dev, "Unable to create phy register regmap\n"); return PTR_ERR(phy_regmap); From c700525fcc06b05adfea78039de02628af79e07a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 12 Mar 2022 15:29:58 -0800 Subject: [PATCH 12/26] net/packet: fix slab-out-of-bounds access in packet_recvmsg() syzbot found that when an AF_PACKET socket is using PACKET_COPY_THRESH and mmap operations, tpacket_rcv() is queueing skbs with garbage in skb->cb[], triggering a too big copy [1] Presumably, users of af_packet using mmap() already gets correct metadata from the mapped buffer, we can simply make sure to clear 12 bytes that might be copied to user space later. BUG: KASAN: stack-out-of-bounds in memcpy include/linux/fortify-string.h:225 [inline] BUG: KASAN: stack-out-of-bounds in packet_recvmsg+0x56c/0x1150 net/packet/af_packet.c:3489 Write of size 165 at addr ffffc9000385fb78 by task syz-executor233/3631 CPU: 0 PID: 3631 Comm: syz-executor233 Not tainted 5.17.0-rc7-syzkaller-02396-g0b3660695e80 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description.constprop.0.cold+0xf/0x336 mm/kasan/report.c:255 __kasan_report mm/kasan/report.c:442 [inline] kasan_report.cold+0x83/0xdf mm/kasan/report.c:459 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189 memcpy+0x39/0x60 mm/kasan/shadow.c:66 memcpy include/linux/fortify-string.h:225 [inline] packet_recvmsg+0x56c/0x1150 net/packet/af_packet.c:3489 sock_recvmsg_nosec net/socket.c:948 [inline] sock_recvmsg net/socket.c:966 [inline] sock_recvmsg net/socket.c:962 [inline] ____sys_recvmsg+0x2c4/0x600 net/socket.c:2632 ___sys_recvmsg+0x127/0x200 net/socket.c:2674 __sys_recvmsg+0xe2/0x1a0 net/socket.c:2704 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fdfd5954c29 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 41 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffcf8e71e48 EFLAGS: 00000246 ORIG_RAX: 000000000000002f RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fdfd5954c29 RDX: 0000000000000000 RSI: 0000000020000500 RDI: 0000000000000005 RBP: 0000000000000000 R08: 000000000000000d R09: 000000000000000d R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffcf8e71e60 R13: 00000000000f4240 R14: 000000000000c1ff R15: 00007ffcf8e71e54 addr ffffc9000385fb78 is located in stack of task syz-executor233/3631 at offset 32 in frame: ____sys_recvmsg+0x0/0x600 include/linux/uio.h:246 this frame has 1 object: [32, 160) 'addr' Memory state around the buggy address: ffffc9000385fa80: 00 04 f3 f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 ffffc9000385fb00: 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 >ffffc9000385fb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f3 ^ ffffc9000385fc00: f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 f1 ffffc9000385fc80: f1 f1 f1 00 f2 f2 f2 00 f2 f2 f2 00 00 00 00 00 ================================================================== Fixes: 0fb375fb9b93 ("[AF_PACKET]: Allow for > 8 byte hardware addresses.") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20220312232958.3535620-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ab87f22cc7ec..a7273af2d900 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2317,8 +2317,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, copy_skb = skb_get(skb); skb_head = skb->data; } - if (copy_skb) + if (copy_skb) { + memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, + sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); skb_set_owner_r(copy_skb, sk); + } } snaplen = po->rx_ring.frame_size - macoff; if ((int)snaplen < 0) { @@ -3462,6 +3465,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + const size_t max_len = min(sizeof(skb->cb), + sizeof(struct sockaddr_storage)); int copy_len; /* If the address length field is there to be filled @@ -3484,6 +3489,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = sizeof(struct sockaddr_ll); } } + if (WARN_ON_ONCE(copy_len > max_len)) { + copy_len = max_len; + msg->msg_namelen = copy_len; + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } From 0f74b29a4f53627376cf5a5fb7b0b3fa748a0b2b Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Mon, 14 Mar 2022 09:34:48 +0800 Subject: [PATCH 13/26] atm: eni: Add check for dma_map_single As the potential failure of the dma_map_single(), it should be better to check it and return error if fails. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jiasheng Jiang Signed-off-by: David S. Miller --- drivers/atm/eni.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 422753d52244..a31ffe16e626 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -1112,6 +1112,8 @@ DPRINTK("iovcnt = %d\n",skb_shinfo(skb)->nr_frags); skb_data3 = skb->data[3]; paddr = dma_map_single(&eni_dev->pci_dev->dev,skb->data,skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(&eni_dev->pci_dev->dev, paddr)) + return enq_next; ENI_PRV_PADDR(skb) = paddr; /* prepare DMA queue entries */ j = 0; From e9c14b59ea2ec19afe22d60b07583b7e08c74290 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Mar 2022 15:28:19 -0700 Subject: [PATCH 14/26] Add Paolo Abeni to networking maintainers Growing the network maintainers team from 2 to 3. Signed-off-by: David S. Miller Link: https://lore.kernel.org/r/20220314222819.958428-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index e127c2fb08a7..cd0f68d4a34a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13376,6 +13376,7 @@ F: net/core/drop_monitor.c NETWORKING DRIVERS M: "David S. Miller" M: Jakub Kicinski +M: Paolo Abeni L: netdev@vger.kernel.org S: Maintained Q: https://patchwork.kernel.org/project/netdevbpf/list/ @@ -13422,6 +13423,7 @@ F: tools/testing/selftests/drivers/net/dsa/ NETWORKING [GENERAL] M: "David S. Miller" M: Jakub Kicinski +M: Paolo Abeni L: netdev@vger.kernel.org S: Maintained Q: https://patchwork.kernel.org/project/netdevbpf/list/ From f153546913bada41a811722f2c6d17c3243a0333 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Mon, 7 Mar 2022 18:47:39 +0100 Subject: [PATCH 15/26] ice: fix NULL pointer dereference in ice_update_vsi_tx_ring_stats() It is possible to do NULL pointer dereference in routine that updates Tx ring stats. Currently only stats and bytes are updated when ring pointer is valid, but later on ring is accessed to propagate gathered Tx stats onto VSI stats. Change the existing logic to move to next ring when ring is NULL. Fixes: e72bba21355d ("ice: split ice_ring onto Tx/Rx separate structs") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Maciej Fijalkowski Acked-by: Alexander Lobakin Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 493942e910be..d4a7c39fd078 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5962,8 +5962,9 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, u64 pkts = 0, bytes = 0; ring = READ_ONCE(rings[i]); - if (ring) - ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); + if (!ring) + continue; + ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); vsi_stats->tx_packets += pkts; vsi_stats->tx_bytes += bytes; vsi->tx_restart += ring->tx_stats.restart_q; From 1b4ae7d925c6569fff27313b4d84171b11510893 Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Thu, 10 Mar 2022 10:46:52 -0800 Subject: [PATCH 16/26] ice: destroy flow director filter mutex after releasing VSIs Currently fdir_fltr_lock is accessed in ice_vsi_release_all() function after it is destroyed. Instead destroy mutex after ice_vsi_release_all. Fixes: 40319796b732 ("ice: Add flow director support for channel mode") Signed-off-by: Sudheer Mogilappagari Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d4a7c39fd078..b7e8744b0c0a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4880,7 +4880,6 @@ static void ice_remove(struct pci_dev *pdev) ice_devlink_unregister_params(pf); set_bit(ICE_DOWN, pf->state); - mutex_destroy(&(&pf->hw)->fdir_fltr_lock); ice_deinit_lag(pf); if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) ice_ptp_release(pf); @@ -4888,6 +4887,7 @@ static void ice_remove(struct pci_dev *pdev) ice_remove_arfs(pf); ice_setup_mc_magic_wake(pf); ice_vsi_release_all(pf); + mutex_destroy(&(&pf->hw)->fdir_fltr_lock); ice_set_wake(pf); ice_free_irq_msix_misc(pf); ice_for_each_vsi(pf, i) { From 16b2dd8cdf6f4e0597c34899de74b4d012b78188 Mon Sep 17 00:00:00 2001 From: Przemyslaw Patynowski Date: Wed, 9 Mar 2022 16:37:39 +0100 Subject: [PATCH 17/26] iavf: Fix double free in iavf_reset_task Fix double free possibility in iavf_disable_vf, as crit_lock is freed in caller, iavf_reset_task. Add kernel-doc for iavf_disable_vf. Remove mutex_unlock in iavf_disable_vf. Without this patch there is double free scenario, when calling iavf_reset_task. Fixes: e85ff9c631e1 ("iavf: Fix deadlock in iavf_reset_task") Signed-off-by: Przemyslaw Patynowski Suggested-by: Dan Carpenter Signed-off-by: Mateusz Palczewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 8e644e9ed8da..45570e3f782e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2541,6 +2541,13 @@ restart_watchdog: queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); } +/** + * iavf_disable_vf - disable VF + * @adapter: board private structure + * + * Set communication failed flag and free all resources. + * NOTE: This function is expected to be called with crit_lock being held. + **/ static void iavf_disable_vf(struct iavf_adapter *adapter) { struct iavf_mac_filter *f, *ftmp; @@ -2595,7 +2602,6 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); adapter->netdev->flags &= ~IFF_UP; - mutex_unlock(&adapter->crit_lock); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); From 886e44c9298a6b428ae046e2fa092ca52e822e6a Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Mon, 14 Mar 2022 10:01:25 +0800 Subject: [PATCH 18/26] hv_netvsc: Add check for kvmalloc_array As the potential failure of the kvmalloc_array(), it should be better to check and restore the 'data' if fails in order to avoid the dereference of the NULL pointer. Fixes: 6ae746711263 ("hv_netvsc: Add per-cpu ethtool stats for netvsc") Signed-off-by: Jiasheng Jiang Link: https://lore.kernel.org/r/20220314020125.2365084-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 3646469433b1..fde1c492ca02 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1587,6 +1587,9 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, pcpu_sum = kvmalloc_array(num_possible_cpus(), sizeof(struct netvsc_ethtool_pcpu_stats), GFP_KERNEL); + if (!pcpu_sum) + return; + netvsc_get_pcpu_stats(dev, pcpu_sum); for_each_present_cpu(cpu) { struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu]; From 45b4eb7ee6aa1a55a50831b328aa5f46ac3a7187 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 15 Mar 2022 17:54:55 +0200 Subject: [PATCH 19/26] Revert "ath10k: drop beacon and probe response which leak from other channel" This reverts commit 3bf2537ec2e33310b431b53fd84be8833736c256. I was reported privately that this commit breaks AP and mesh mode on QCA9984 (firmware 10.4-3.9.0.2-00156). So revert the commit to fix the regression. There was a conflict due to cfg80211 API changes but that was easy to fix. Fixes: 3bf2537ec2e3 ("ath10k: drop beacon and probe response which leak from other channel") Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220315155455.20446-1-kvalo@kernel.org --- drivers/net/wireless/ath/ath10k/wmi.c | 33 +-------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 62c453a21e49..7c1c2658cb5f 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2611,36 +2611,9 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ath10k_mac_handle_beacon(ar, skb); if (ieee80211_is_beacon(hdr->frame_control) || - ieee80211_is_probe_resp(hdr->frame_control)) { - struct ieee80211_mgmt *mgmt = (void *)skb->data; - enum cfg80211_bss_frame_type ftype; - u8 *ies; - int ies_ch; - + ieee80211_is_probe_resp(hdr->frame_control)) status->boottime_ns = ktime_get_boottime_ns(); - if (!ar->scan_channel) - goto drop; - - ies = mgmt->u.beacon.variable; - - if (ieee80211_is_beacon(mgmt->frame_control)) - ftype = CFG80211_BSS_FTYPE_BEACON; - else - ftype = CFG80211_BSS_FTYPE_PRESP; - - ies_ch = cfg80211_get_ies_channel_number(mgmt->u.beacon.variable, - skb_tail_pointer(skb) - ies, - sband->band, ftype); - - if (ies_ch > 0 && ies_ch != channel) { - ath10k_dbg(ar, ATH10K_DBG_MGMT, - "channel mismatched ds channel %d scan channel %d\n", - ies_ch, channel); - goto drop; - } - } - ath10k_dbg(ar, ATH10K_DBG_MGMT, "event mgmt rx skb %pK len %d ftype %02x stype %02x\n", skb, skb->len, @@ -2654,10 +2627,6 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ieee80211_rx_ni(ar->hw, skb); return 0; - -drop: - dev_kfree_skb(skb); - return 0; } static int freq_to_idx(struct ath10k *ar, int freq) From 4ee06de7729d795773145692e246a06448b1eb7a Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 15 Mar 2022 10:20:08 +0100 Subject: [PATCH 20/26] net: handle ARPHRD_PIMREG in dev_is_mac_header_xmit() This kind of interface doesn't have a mac header. This patch fixes bpf_redirect() to a PIM interface. Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper") Signed-off-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20220315092008.31423-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- include/linux/if_arp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index b712217f7030..1ed52441972f 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -52,6 +52,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev) case ARPHRD_VOID: case ARPHRD_NONE: case ARPHRD_RAWIP: + case ARPHRD_PIMREG: return false; default: return true; From cb0b430b4e3acc88c85e0ad2e25f2a25a5765262 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 16 Mar 2022 08:26:02 +0000 Subject: [PATCH 21/26] net: dsa: Add missing of_node_put() in dsa_port_parse_of The device_node pointer is returned by of_parse_phandle() with refcount incremented. We should use of_node_put() on it when done. Fixes: 6d4e5c570c2d ("net: dsa: get port type at parse time") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20220316082602.10785-1-linmq006@gmail.com Signed-off-by: Paolo Abeni --- net/dsa/dsa2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 074e4a69a728..88e2808019b4 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1436,6 +1436,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) const char *user_protocol; master = of_find_net_device_by_node(ethernet); + of_node_put(ethernet); if (!master) return -EPROBE_DEFER; From f1858c277ba40172005b76a31e6bb931bfc19d9c Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Wed, 16 Mar 2022 16:18:35 +0100 Subject: [PATCH 22/26] net: phy: mscc: Add MODULE_FIRMWARE macros The driver requires firmware so define MODULE_FIRMWARE so that modinfo provides the details. Fixes: fa164e40c53b ("net: phy: mscc: split the driver into separate files") Signed-off-by: Juerg Haefliger Link: https://lore.kernel.org/r/20220316151835.88765-1-juergh@canonical.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mscc/mscc_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index ebfeeb3c67c1..7e3017e7a1c0 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -2685,3 +2685,6 @@ MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl); MODULE_DESCRIPTION("Microsemi VSC85xx PHY driver"); MODULE_AUTHOR("Nagaraju Lakkaraju"); MODULE_LICENSE("Dual MIT/GPL"); + +MODULE_FIRMWARE(MSCC_VSC8584_REVB_INT8051_FW); +MODULE_FIRMWARE(MSCC_VSC8574_REVB_INT8051_FW); From 424e7834e293936a54fcf05173f2884171adc5a3 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Wed, 16 Mar 2022 14:46:13 -0700 Subject: [PATCH 23/26] bnx2x: fix built-in kernel driver load failure Commit b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0") added request_firmware() logic in probe() which caused load failure when firmware file is not present in initrd (below), as access to firmware file is not feasible during probe. Direct firmware load for bnx2x/bnx2x-e2-7.13.15.0.fw failed with error -2 Direct firmware load for bnx2x/bnx2x-e2-7.13.21.0.fw failed with error -2 This patch fixes this issue by - 1. Removing request_firmware() logic from the probe() such that .ndo_open() handle it as it used to handle it earlier 2. Given request_firmware() is removed from probe(), so driver has to relax FW version comparisons a bit against the already loaded FW version (by some other PFs of same adapter) to allow different compatible/close enough FWs with which multiple PFs may run with (in different environments), as the given PF who is in probe flow has no idea now with which firmware file version it is going to initialize the device in ndo_open() Link: https://lore.kernel.org/all/46f2d9d9-ae7f-b332-ddeb-b59802be2bab@molgen.mpg.de/ Reported-by: Paul Menzel Tested-by: Paul Menzel Fixes: b7a49f73059f ("bnx2x: Utilize firmware 7.13.21.0") Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Link: https://lore.kernel.org/r/20220316214613.6884-1-manishc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 2 -- .../net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 28 +++++++++++-------- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 15 ++-------- 3 files changed, 19 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index a19dd6797070..2209d99b3404 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -2533,6 +2533,4 @@ void bnx2x_register_phc(struct bnx2x *bp); * Meant for implicit re-load flows. */ int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp); -int bnx2x_init_firmware(struct bnx2x *bp); -void bnx2x_release_firmware(struct bnx2x *bp); #endif /* bnx2x.h */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 8d36ebbf08e1..5729a5ab059d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2364,24 +2364,30 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err) /* is another pf loaded on this engine? */ if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP && load_code != FW_MSG_CODE_DRV_LOAD_COMMON) { - /* build my FW version dword */ - u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) + - (bp->fw_rev << 16) + (bp->fw_eng << 24); + u8 loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng; + u32 loaded_fw; /* read loaded FW from chip */ - u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM); + loaded_fw = REG_RD(bp, XSEM_REG_PRAM); - DP(BNX2X_MSG_SP, "loaded fw %x, my fw %x\n", - loaded_fw, my_fw); + loaded_fw_major = loaded_fw & 0xff; + loaded_fw_minor = (loaded_fw >> 8) & 0xff; + loaded_fw_rev = (loaded_fw >> 16) & 0xff; + loaded_fw_eng = (loaded_fw >> 24) & 0xff; + + DP(BNX2X_MSG_SP, "loaded fw 0x%x major 0x%x minor 0x%x rev 0x%x eng 0x%x\n", + loaded_fw, loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng); /* abort nic load if version mismatch */ - if (my_fw != loaded_fw) { + if (loaded_fw_major != BCM_5710_FW_MAJOR_VERSION || + loaded_fw_minor != BCM_5710_FW_MINOR_VERSION || + loaded_fw_eng != BCM_5710_FW_ENGINEERING_VERSION || + loaded_fw_rev < BCM_5710_FW_REVISION_VERSION_V15) { if (print_err) - BNX2X_ERR("bnx2x with FW %x was already loaded which mismatches my %x FW. Aborting\n", - loaded_fw, my_fw); + BNX2X_ERR("loaded FW incompatible. Aborting\n"); else - BNX2X_DEV_INFO("bnx2x with FW %x was already loaded which mismatches my %x FW, possibly due to MF UNDI\n", - loaded_fw, my_fw); + BNX2X_DEV_INFO("loaded FW incompatible, possibly due to MF UNDI\n"); + return -EBUSY; } } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index eedb48d945ed..c19b072f3a23 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12319,15 +12319,6 @@ static int bnx2x_init_bp(struct bnx2x *bp) bnx2x_read_fwinfo(bp); - if (IS_PF(bp)) { - rc = bnx2x_init_firmware(bp); - - if (rc) { - bnx2x_free_mem_bp(bp); - return rc; - } - } - func = BP_FUNC(bp); /* need to reset chip if undi was active */ @@ -12340,7 +12331,6 @@ static int bnx2x_init_bp(struct bnx2x *bp) rc = bnx2x_prev_unload(bp); if (rc) { - bnx2x_release_firmware(bp); bnx2x_free_mem_bp(bp); return rc; } @@ -13409,7 +13399,7 @@ do { \ (u8 *)bp->arr, len); \ } while (0) -int bnx2x_init_firmware(struct bnx2x *bp) +static int bnx2x_init_firmware(struct bnx2x *bp) { const char *fw_file_name, *fw_file_name_v15; struct bnx2x_fw_file_hdr *fw_hdr; @@ -13509,7 +13499,7 @@ request_firmware_exit: return rc; } -void bnx2x_release_firmware(struct bnx2x *bp) +static void bnx2x_release_firmware(struct bnx2x *bp) { kfree(bp->init_ops_offsets); kfree(bp->init_ops); @@ -14026,7 +14016,6 @@ static int bnx2x_init_one(struct pci_dev *pdev, return 0; init_one_freemem: - bnx2x_release_firmware(bp); bnx2x_free_mem_bp(bp); init_one_exit: From 0f643c88c8d240eba0ea25c2e095a46515ff46e9 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 16 Mar 2022 18:28:12 -0700 Subject: [PATCH 24/26] net: bcmgenet: skip invalid partial checksums The RXCHK block will return a partial checksum of 0 if it encounters a problem while receiving a packet. Since a 1's complement sum can only produce this result if no bits are set in the received data stream it is fair to treat it as an invalid partial checksum and not pass it up the stack. Fixes: 810155397890 ("net: bcmgenet: use CHECKSUM_COMPLETE for NETIF_F_RXCSUM") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20220317012812.1313196-1-opendmb@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 87f1056e29ff..2da804f84b48 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2287,8 +2287,10 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, dma_length_status = status->length_status; if (dev->features & NETIF_F_RXCSUM) { rx_csum = (__force __be16)(status->rx_csum & 0xffff); - skb->csum = (__force __wsum)ntohs(rx_csum); - skb->ip_summed = CHECKSUM_COMPLETE; + if (rx_csum) { + skb->csum = (__force __wsum)ntohs(rx_csum); + skb->ip_summed = CHECKSUM_COMPLETE; + } } /* DMA flags and length are still valid no matter how From 8e0341aefcc9133f3f48683873284b169581315b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Mar 2022 21:21:17 +0200 Subject: [PATCH 25/26] net: mscc: ocelot: fix backwards compatibility with single-chain tc-flower offload ACL rules can be offloaded to VCAP IS2 either through chain 0, or, since the blamed commit, through a chain index whose number encodes a specific PAG (Policy Action Group) and lookup number. The chain number is translated through ocelot_chain_to_pag() into a PAG, and through ocelot_chain_to_lookup() into a lookup number. The problem with the blamed commit is that the above 2 functions don't have special treatment for chain 0. So ocelot_chain_to_pag(0) returns filter->pag = 224, which is in fact -32, but the "pag" field is an u8. So we end up programming the hardware with VCAP IS2 entries having a PAG of 224. But the way in which the PAG works is that it defines a subset of VCAP IS2 filters which should match on a packet. The default PAG is 0, and previous VCAP IS1 rules (which we offload using 'goto') can modify it. So basically, we are installing filters with a PAG on which no packet will ever match. This is the hardware equivalent of adding filters to a chain which has no 'goto' to it. Restore the previous functionality by making ACL filters offloaded to chain 0 go to PAG 0 and lookup number 0. The choice of PAG is clearly correct, but the choice of lookup number isn't "as before" (which was to leave the lookup a "don't care"). However, lookup 0 should be fine, since even though there are ACL actions (policers) which have a requirement to be used in a specific lookup, that lookup is 0. Fixes: 226e9cd82a96 ("net: mscc: ocelot: only install TCAM entries into a specific lookup and PAG") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220316192117.2568261-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_flower.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 949858891973..fdb4d7e7296c 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -60,6 +60,12 @@ static int ocelot_chain_to_block(int chain, bool ingress) */ static int ocelot_chain_to_lookup(int chain) { + /* Backwards compatibility with older, single-chain tc-flower + * offload support in Ocelot + */ + if (chain == 0) + return 0; + return (chain / VCAP_LOOKUP) % 10; } @@ -68,7 +74,15 @@ static int ocelot_chain_to_lookup(int chain) */ static int ocelot_chain_to_pag(int chain) { - int lookup = ocelot_chain_to_lookup(chain); + int lookup; + + /* Backwards compatibility with older, single-chain tc-flower + * offload support in Ocelot + */ + if (chain == 0) + return 0; + + lookup = ocelot_chain_to_lookup(chain); /* calculate PAG value as chain index relative to the first PAG */ return chain - VCAP_IS2_CHAIN(lookup, 0); From b04683ff8f0823b869c219c78ba0d974bddea0b5 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 17 Mar 2022 11:45:24 +0100 Subject: [PATCH 26/26] iavf: Fix hang during reboot/shutdown Recent commit 974578017fc1 ("iavf: Add waiting so the port is initialized in remove") adds a wait-loop at the beginning of iavf_remove() to ensure that port initialization is finished prior unregistering net device. This causes a regression in reboot/shutdown scenario because in this case callback iavf_shutdown() is called and this callback detaches the device, makes it down if it is running and sets its state to __IAVF_REMOVE. Later shutdown callback of associated PF driver (e.g. ice_shutdown) is called. That callback calls among other things sriov_disable() that calls indirectly iavf_remove() (see stack trace below). As the adapter state is already __IAVF_REMOVE then the mentioned loop is end-less and shutdown process hangs. The patch fixes this by checking adapter's state at the beginning of iavf_remove() and skips the rest of the function if the adapter is already in remove state (shutdown is in progress). Reproducer: 1. Create VF on PF driven by ice or i40e driver 2. Ensure that the VF is bound to iavf driver 3. Reboot [52625.981294] sysrq: SysRq : Show Blocked State [52625.988377] task:reboot state:D stack: 0 pid:17359 ppid: 1 f2 [52625.996732] Call Trace: [52625.999187] __schedule+0x2d1/0x830 [52626.007400] schedule+0x35/0xa0 [52626.010545] schedule_hrtimeout_range_clock+0x83/0x100 [52626.020046] usleep_range+0x5b/0x80 [52626.023540] iavf_remove+0x63/0x5b0 [iavf] [52626.027645] pci_device_remove+0x3b/0xc0 [52626.031572] device_release_driver_internal+0x103/0x1f0 [52626.036805] pci_stop_bus_device+0x72/0xa0 [52626.040904] pci_stop_and_remove_bus_device+0xe/0x20 [52626.045870] pci_iov_remove_virtfn+0xba/0x120 [52626.050232] sriov_disable+0x2f/0xe0 [52626.053813] ice_free_vfs+0x7c/0x340 [ice] [52626.057946] ice_remove+0x220/0x240 [ice] [52626.061967] ice_shutdown+0x16/0x50 [ice] [52626.065987] pci_device_shutdown+0x34/0x60 [52626.070086] device_shutdown+0x165/0x1c5 [52626.074011] kernel_restart+0xe/0x30 [52626.077593] __do_sys_reboot+0x1d2/0x210 [52626.093815] do_syscall_64+0x5b/0x1a0 [52626.097483] entry_SYSCALL_64_after_hwframe+0x65/0xca Fixes: 974578017fc1 ("iavf: Add waiting so the port is initialized in remove") Signed-off-by: Ivan Vecera Link: https://lore.kernel.org/r/20220317104524.2802848-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 45570e3f782e..0e178a0a59c5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4620,6 +4620,13 @@ static void iavf_remove(struct pci_dev *pdev) struct iavf_hw *hw = &adapter->hw; int err; + /* When reboot/shutdown is in progress no need to do anything + * as the adapter is already REMOVE state that was set during + * iavf_shutdown() callback. + */ + if (adapter->state == __IAVF_REMOVE) + return; + set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section); /* Wait until port initialization is complete. * There are flows where register/unregister netdev may race.