OpenCloudOS-Kernel/net/core/link_watch.c

295 lines
6.6 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Linux network device link state notification
*
* Author:
* Stefan Rompf <sux@loplof.de>
*/
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/if.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/jiffies.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include "dev.h"
enum lw_bits {
LW_URGENT = 0,
};
static unsigned long linkwatch_flags;
static unsigned long linkwatch_nextevent;
2006-11-22 22:55:48 +08:00
static void linkwatch_event(struct work_struct *dummy);
static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
static LIST_HEAD(lweventlist);
static DEFINE_SPINLOCK(lweventlist_lock);
static unsigned char default_operstate(const struct net_device *dev)
{
if (netif_testing(dev))
return IF_OPER_TESTING;
net: linkwatch: only report IF_OPER_LOWERLAYERDOWN if iflink is actually down RFC 2863 says: The lowerLayerDown state is also a refinement on the down state. This new state indicates that this interface runs "on top of" one or more other interfaces (see ifStackTable) and that this interface is down specifically because one or more of these lower-layer interfaces are down. DSA interfaces are virtual network devices, stacked on top of the DSA master, but they have a physical MAC, with a PHY that reports a real link status. But since DSA (perhaps improperly) uses an iflink to describe the relationship to its master since commit c084080151e1 ("dsa: set ->iflink on slave interfaces to the ifindex of the parent"), default_operstate() will misinterpret this to mean that every time the carrier of a DSA interface is not ok, it is because of the master being not ok. In fact, since commit c0a8a9c27493 ("net: dsa: automatically bring user ports down when master goes down"), DSA cannot even in theory be in the lowerLayerDown state, because it just calls dev_close_many(), thereby going down, when the master goes down. We could revert the commit that creates an iflink between a DSA user port and its master, especially since now we have an alternative IFLA_DSA_MASTER which has less side effects. But there may be tooling in use which relies on the iflink, which has existed since 2009. We could also probably do something local within DSA to overwrite what rfc2863_policy() did, in a way similar to hsr_set_operstate(), but this seems like a hack. What seems appropriate is to follow the iflink, and check the carrier status of that interface as well. If that's down too, yes, keep reporting lowerLayerDown, otherwise just down. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-11-14 22:42:56 +08:00
/* Some uppers (DSA) have additional sources for being down, so
* first check whether lower is indeed the source of its down state.
*/
if (!netif_carrier_ok(dev)) {
int iflink = dev_get_iflink(dev);
struct net_device *peer;
if (iflink == dev->ifindex)
return IF_OPER_DOWN;
peer = __dev_get_by_index(dev_net(dev), iflink);
if (!peer)
return IF_OPER_DOWN;
return netif_carrier_ok(peer) ? IF_OPER_DOWN :
IF_OPER_LOWERLAYERDOWN;
}
if (netif_dormant(dev))
return IF_OPER_DORMANT;
return IF_OPER_UP;
}
static void rfc2863_policy(struct net_device *dev)
{
unsigned char operstate = default_operstate(dev);
if (operstate == READ_ONCE(dev->operstate))
return;
write_lock(&dev_base_lock);
switch(dev->link_mode) {
case IF_LINK_MODE_TESTING:
if (operstate == IF_OPER_UP)
operstate = IF_OPER_TESTING;
break;
case IF_LINK_MODE_DORMANT:
if (operstate == IF_OPER_UP)
operstate = IF_OPER_DORMANT;
break;
case IF_LINK_MODE_DEFAULT:
default:
break;
}
WRITE_ONCE(dev->operstate, operstate);
write_unlock(&dev_base_lock);
}
net: Set device operstate at registration time The operstate of a device is initially IF_OPER_UNKNOWN and is updated asynchronously by linkwatch after each change of carrier state reported by the driver. The default carrier state of a net device is on, and this will never be changed on drivers that do not support carrier detection, thus the operstate remains IF_OPER_UNKNOWN. For devices that do support carrier detection, the driver must set the carrier state to off initially, then poll the hardware state when the device is opened. However, we must not activate linkwatch for a unregistered device, and commit b473001 ('net: Do not fire linkwatch events until the device is registered.') ensured that we don't. But this means that the operstate for many devices that support carrier detection remains IF_OPER_UNKNOWN when it should be IF_OPER_DOWN. The same issue exists with the dormant state. The proper initialisation sequence, avoiding a race with opening of the device, is: rtnl_lock(); rc = register_netdevice(dev); if (rc) goto out_unlock; netif_carrier_off(dev); /* or netif_dormant_on(dev) */ rtnl_unlock(); but it seems silly that this should have to be repeated in so many drivers. Further, the operstate seen immediately after opening the device may still be IF_OPER_UNKNOWN due to the asynchronous nature of linkwatch. Commit 22604c8 ('net: Fix for initial link state in 2.6.28') attempted to fix this by setting the operstate synchronously, but it was reverted as it could lead to deadlock. This initialises the operstate synchronously at registration time only. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-08-21 05:16:51 +08:00
void linkwatch_init_dev(struct net_device *dev)
{
/* Handle pre-registration link state changes */
if (!netif_carrier_ok(dev) || netif_dormant(dev) ||
netif_testing(dev))
net: Set device operstate at registration time The operstate of a device is initially IF_OPER_UNKNOWN and is updated asynchronously by linkwatch after each change of carrier state reported by the driver. The default carrier state of a net device is on, and this will never be changed on drivers that do not support carrier detection, thus the operstate remains IF_OPER_UNKNOWN. For devices that do support carrier detection, the driver must set the carrier state to off initially, then poll the hardware state when the device is opened. However, we must not activate linkwatch for a unregistered device, and commit b473001 ('net: Do not fire linkwatch events until the device is registered.') ensured that we don't. But this means that the operstate for many devices that support carrier detection remains IF_OPER_UNKNOWN when it should be IF_OPER_DOWN. The same issue exists with the dormant state. The proper initialisation sequence, avoiding a race with opening of the device, is: rtnl_lock(); rc = register_netdevice(dev); if (rc) goto out_unlock; netif_carrier_off(dev); /* or netif_dormant_on(dev) */ rtnl_unlock(); but it seems silly that this should have to be repeated in so many drivers. Further, the operstate seen immediately after opening the device may still be IF_OPER_UNKNOWN due to the asynchronous nature of linkwatch. Commit 22604c8 ('net: Fix for initial link state in 2.6.28') attempted to fix this by setting the operstate synchronously, but it was reverted as it could lead to deadlock. This initialises the operstate synchronously at registration time only. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2012-08-21 05:16:51 +08:00
rfc2863_policy(dev);
}
static bool linkwatch_urgent_event(struct net_device *dev)
{
if (!netif_running(dev))
return false;
if (dev->ifindex != dev_get_iflink(dev))
return true;
if (netif_is_lag_port(dev) || netif_is_lag_master(dev))
return true;
return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
}
static void linkwatch_add_event(struct net_device *dev)
{
unsigned long flags;
spin_lock_irqsave(&lweventlist_lock, flags);
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
if (list_empty(&dev->link_watch_list)) {
list_add_tail(&dev->link_watch_list, &lweventlist);
netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC);
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
}
spin_unlock_irqrestore(&lweventlist_lock, flags);
}
static void linkwatch_schedule_work(int urgent)
{
unsigned long delay = linkwatch_nextevent - jiffies;
if (test_bit(LW_URGENT, &linkwatch_flags))
return;
/* Minimise down-time: drop delay for up event. */
if (urgent) {
if (test_and_set_bit(LW_URGENT, &linkwatch_flags))
return;
delay = 0;
}
/* If we wrap around we'll delay it by at most HZ. */
if (delay > HZ)
delay = 0;
/*
* If urgent, schedule immediate execution; otherwise, don't
* override the existing timer.
*/
if (test_bit(LW_URGENT, &linkwatch_flags))
mod_delayed_work(system_unbound_wq, &linkwatch_work, 0);
else
queue_delayed_work(system_unbound_wq, &linkwatch_work, delay);
}
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
static void linkwatch_do_dev(struct net_device *dev)
{
/*
* Make sure the above read is complete since it can be
* rewritten as soon as we clear the bit below.
*/
smp_mb__before_atomic();
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
/* We are about to handle this device,
* so new events can be accepted
*/
clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
rfc2863_policy(dev);
net: linkwatch: fix failure to restore device state across suspend/resume After migrating my laptop from 4.19-LTS to 5.4-LTS a while ago I noticed that my Ethernet port to which a bond and a VLAN interface are attached appeared to remain up after resuming from suspend with the cable unplugged (and that problem still persists with 5.10-LTS). It happens that the following happens: - the network driver (e1000e here) prepares to suspend, calls e1000e_down() which calls netif_carrier_off() to signal that the link is going down. - netif_carrier_off() adds a link_watch event to the list of events for this device - the device is completely stopped. - the machine suspends - the cable is unplugged and the machine brought to another location - the machine is resumed - the queued linkwatch events are processed for the device - the device doesn't yet have the __LINK_STATE_PRESENT bit and its events are silently dropped - the device is resumed with its link down - the upper VLAN and bond interfaces are never notified that the link had been turned down and remain up - the only way to provoke a change is to physically connect the machine to a port and possibly unplug it. The state after resume looks like this: $ ip -br li | egrep 'bond|eth' bond0 UP e8:6a:64:64:64:64 <BROADCAST,MULTICAST,MASTER,UP,LOWER_UP> eth0 DOWN e8:6a:64:64:64:64 <NO-CARRIER,BROADCAST,MULTICAST,SLAVE,UP> eth0.2@eth0 UP e8:6a:64:64:64:64 <BROADCAST,MULTICAST,SLAVE,UP,LOWER_UP> Placing an explicit call to netdev_state_change() either in the suspend or the resume code in the NIC driver worked around this but the solution is not satisfying. The issue in fact really is in link_watch that loses events while it ought not to. It happens that the test for the device being present was added by commit 124eee3f6955 ("net: linkwatch: add check for netdevice being present to linkwatch_do_dev") in 4.20 to avoid an access to devices that are not present. Instead of dropping events, this patch proceeds slightly differently by postponing their handling so that they happen after the device is fully resumed. Fixes: 124eee3f6955 ("net: linkwatch: add check for netdevice being present to linkwatch_do_dev") Link: https://lists.openwall.net/netdev/2018/03/15/62 Cc: Heiner Kallweit <hkallweit1@gmail.com> Cc: Geert Uytterhoeven <geert+renesas@glider.be> Cc: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Willy Tarreau <w@1wt.eu> Link: https://lore.kernel.org/r/20210809160628.22623-1-w@1wt.eu Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-08-10 00:06:28 +08:00
if (dev->flags & IFF_UP) {
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
if (netif_carrier_ok(dev))
dev_activate(dev);
else
dev_deactivate(dev);
netdev_state_change(dev);
}
/* Note: our callers are responsible for calling netdev_tracker_free().
* This is the reason we use __dev_put() instead of dev_put().
net: linkwatch: be more careful about dev->linkwatch_dev_tracker Apparently a concurrent linkwatch_add_event() could run while we are in __linkwatch_run_queue(). We need to free dev->linkwatch_dev_tracker tracker under lweventlist_lock protection to avoid this race. syzbot report: [ 77.935949][ T3661] reference already released. [ 77.941015][ T3661] allocated in: [ 77.944482][ T3661] linkwatch_fire_event+0x202/0x260 [ 77.950318][ T3661] netif_carrier_on+0x9c/0x100 [ 77.955120][ T3661] __ieee80211_sta_join_ibss+0xc52/0x1590 [ 77.960888][ T3661] ieee80211_sta_create_ibss.cold+0xd2/0x11f [ 77.966908][ T3661] ieee80211_ibss_work.cold+0x30e/0x60f [ 77.972483][ T3661] ieee80211_iface_work+0xb70/0xd00 [ 77.977715][ T3661] process_one_work+0x9ac/0x1680 [ 77.982671][ T3661] worker_thread+0x652/0x11c0 [ 77.987371][ T3661] kthread+0x405/0x4f0 [ 77.991465][ T3661] ret_from_fork+0x1f/0x30 [ 77.995895][ T3661] freed in: [ 77.999006][ T3661] linkwatch_do_dev+0x96/0x160 [ 78.004014][ T3661] __linkwatch_run_queue+0x233/0x6a0 [ 78.009496][ T3661] linkwatch_event+0x4a/0x60 [ 78.014099][ T3661] process_one_work+0x9ac/0x1680 [ 78.019034][ T3661] worker_thread+0x652/0x11c0 [ 78.023719][ T3661] kthread+0x405/0x4f0 [ 78.027810][ T3661] ret_from_fork+0x1f/0x30 [ 78.042541][ T3661] ------------[ cut here ]------------ [ 78.048253][ T3661] WARNING: CPU: 0 PID: 3661 at lib/ref_tracker.c:120 ref_tracker_free.cold+0x110/0x14e [ 78.062364][ T3661] Modules linked in: [ 78.066424][ T3661] CPU: 0 PID: 3661 Comm: kworker/0:5 Not tainted 5.16.0-rc4-next-20211210-syzkaller #0 [ 78.076075][ T3661] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ 78.090648][ T3661] Workqueue: events linkwatch_event [ 78.095890][ T3661] RIP: 0010:ref_tracker_free.cold+0x110/0x14e [ 78.102191][ T3661] Code: ea 03 48 c1 e0 2a 0f b6 04 02 84 c0 74 04 3c 03 7e 4c 8b 7b 18 e8 6b 54 e9 fa e8 26 4d 57 f8 4c 89 ee 48 89 ef e8 fb 33 36 00 <0f> 0b 41 bd ea ff ff ff e9 bd 60 e9 fa 4c 89 f7 e8 16 45 a2 f8 e9 [ 78.127211][ T3661] RSP: 0018:ffffc90002b5fb18 EFLAGS: 00010246 [ 78.133684][ T3661] RAX: 0000000000000000 RBX: ffff88807467f700 RCX: 0000000000000000 [ 78.141928][ T3661] RDX: 0000000000000001 RSI: 0000000000000001 RDI: 0000000000000001 [ 78.150087][ T3661] RBP: ffff888057e105b8 R08: 0000000000000001 R09: ffffffff8ffa1967 [ 78.158211][ T3661] R10: 0000000000000001 R11: 0000000000000000 R12: 1ffff9200056bf65 [ 78.166204][ T3661] R13: 0000000000000292 R14: ffff88807467f718 R15: 00000000c0e0008c [ 78.174321][ T3661] FS: 0000000000000000(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 [ 78.183310][ T3661] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 78.190156][ T3661] CR2: 000000c000208800 CR3: 000000007f7b5000 CR4: 00000000003506f0 [ 78.198235][ T3661] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 78.206214][ T3661] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 78.214328][ T3661] Call Trace: [ 78.217679][ T3661] <TASK> [ 78.220621][ T3661] ? __sanitizer_cov_trace_const_cmp4+0x1c/0x70 [ 78.226981][ T3661] ? nlmsg_notify+0xbe/0x280 [ 78.231607][ T3661] ? ref_tracker_dir_exit+0x330/0x330 [ 78.237654][ T3661] ? linkwatch_do_dev+0x96/0x160 [ 78.242628][ T3661] ? __linkwatch_run_queue+0x233/0x6a0 [ 78.248170][ T3661] ? linkwatch_event+0x4a/0x60 [ 78.252946][ T3661] ? process_one_work+0x9ac/0x1680 [ 78.258136][ T3661] ? worker_thread+0x853/0x11c0 [ 78.263020][ T3661] ? kthread+0x405/0x4f0 [ 78.267905][ T3661] ? ret_from_fork+0x1f/0x30 [ 78.272670][ T3661] ? netdev_state_change+0xa1/0x130 [ 78.278019][ T3661] ? netdev_exit+0xd0/0xd0 [ 78.282466][ T3661] ? dev_activate+0x420/0xa60 [ 78.287261][ T3661] linkwatch_do_dev+0x96/0x160 [ 78.292043][ T3661] __linkwatch_run_queue+0x233/0x6a0 [ 78.297505][ T3661] ? linkwatch_do_dev+0x160/0x160 [ 78.302561][ T3661] linkwatch_event+0x4a/0x60 [ 78.307225][ T3661] process_one_work+0x9ac/0x1680 [ 78.312292][ T3661] ? pwq_dec_nr_in_flight+0x2a0/0x2a0 [ 78.317757][ T3661] ? rwlock_bug.part.0+0x90/0x90 [ 78.322726][ T3661] ? _raw_spin_lock_irq+0x41/0x50 [ 78.327844][ T3661] worker_thread+0x853/0x11c0 [ 78.332543][ T3661] ? process_one_work+0x1680/0x1680 [ 78.338500][ T3661] kthread+0x405/0x4f0 [ 78.342610][ T3661] ? set_kthread_struct+0x130/0x130 Fixes: 63f13937cbe9 ("net: linkwatch: add net device refcount tracker") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Link: https://lore.kernel.org/r/20211214051955.3569843-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-14 13:19:55 +08:00
*/
__dev_put(dev);
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
}
static void __linkwatch_run_queue(int urgent_only)
{
#define MAX_DO_DEV_PER_LOOP 100
int do_dev = MAX_DO_DEV_PER_LOOP;
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
struct net_device *dev;
LIST_HEAD(wrk);
/* Give urgent case more budget */
if (urgent_only)
do_dev += MAX_DO_DEV_PER_LOOP;
/*
* Limit the number of linkwatch events to one
* per second so that a runaway driver does not
* cause a storm of messages on the netlink
* socket. This limit does not apply to up events
* while the device qdisc is down.
*/
if (!urgent_only)
linkwatch_nextevent = jiffies + HZ;
/* Limit wrap-around effect on delay. */
else if (time_after(linkwatch_nextevent, jiffies + HZ))
linkwatch_nextevent = jiffies;
clear_bit(LW_URGENT, &linkwatch_flags);
spin_lock_irq(&lweventlist_lock);
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
list_splice_init(&lweventlist, &wrk);
while (!list_empty(&wrk) && do_dev > 0) {
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
dev = list_first_entry(&wrk, struct net_device, link_watch_list);
list_del_init(&dev->link_watch_list);
net: linkwatch: fix failure to restore device state across suspend/resume After migrating my laptop from 4.19-LTS to 5.4-LTS a while ago I noticed that my Ethernet port to which a bond and a VLAN interface are attached appeared to remain up after resuming from suspend with the cable unplugged (and that problem still persists with 5.10-LTS). It happens that the following happens: - the network driver (e1000e here) prepares to suspend, calls e1000e_down() which calls netif_carrier_off() to signal that the link is going down. - netif_carrier_off() adds a link_watch event to the list of events for this device - the device is completely stopped. - the machine suspends - the cable is unplugged and the machine brought to another location - the machine is resumed - the queued linkwatch events are processed for the device - the device doesn't yet have the __LINK_STATE_PRESENT bit and its events are silently dropped - the device is resumed with its link down - the upper VLAN and bond interfaces are never notified that the link had been turned down and remain up - the only way to provoke a change is to physically connect the machine to a port and possibly unplug it. The state after resume looks like this: $ ip -br li | egrep 'bond|eth' bond0 UP e8:6a:64:64:64:64 <BROADCAST,MULTICAST,MASTER,UP,LOWER_UP> eth0 DOWN e8:6a:64:64:64:64 <NO-CARRIER,BROADCAST,MULTICAST,SLAVE,UP> eth0.2@eth0 UP e8:6a:64:64:64:64 <BROADCAST,MULTICAST,SLAVE,UP,LOWER_UP> Placing an explicit call to netdev_state_change() either in the suspend or the resume code in the NIC driver worked around this but the solution is not satisfying. The issue in fact really is in link_watch that loses events while it ought not to. It happens that the test for the device being present was added by commit 124eee3f6955 ("net: linkwatch: add check for netdevice being present to linkwatch_do_dev") in 4.20 to avoid an access to devices that are not present. Instead of dropping events, this patch proceeds slightly differently by postponing their handling so that they happen after the device is fully resumed. Fixes: 124eee3f6955 ("net: linkwatch: add check for netdevice being present to linkwatch_do_dev") Link: https://lists.openwall.net/netdev/2018/03/15/62 Cc: Heiner Kallweit <hkallweit1@gmail.com> Cc: Geert Uytterhoeven <geert+renesas@glider.be> Cc: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Willy Tarreau <w@1wt.eu> Link: https://lore.kernel.org/r/20210809160628.22623-1-w@1wt.eu Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-08-10 00:06:28 +08:00
if (!netif_device_present(dev) ||
(urgent_only && !linkwatch_urgent_event(dev))) {
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
list_add_tail(&dev->link_watch_list, &lweventlist);
continue;
}
net: linkwatch: be more careful about dev->linkwatch_dev_tracker Apparently a concurrent linkwatch_add_event() could run while we are in __linkwatch_run_queue(). We need to free dev->linkwatch_dev_tracker tracker under lweventlist_lock protection to avoid this race. syzbot report: [ 77.935949][ T3661] reference already released. [ 77.941015][ T3661] allocated in: [ 77.944482][ T3661] linkwatch_fire_event+0x202/0x260 [ 77.950318][ T3661] netif_carrier_on+0x9c/0x100 [ 77.955120][ T3661] __ieee80211_sta_join_ibss+0xc52/0x1590 [ 77.960888][ T3661] ieee80211_sta_create_ibss.cold+0xd2/0x11f [ 77.966908][ T3661] ieee80211_ibss_work.cold+0x30e/0x60f [ 77.972483][ T3661] ieee80211_iface_work+0xb70/0xd00 [ 77.977715][ T3661] process_one_work+0x9ac/0x1680 [ 77.982671][ T3661] worker_thread+0x652/0x11c0 [ 77.987371][ T3661] kthread+0x405/0x4f0 [ 77.991465][ T3661] ret_from_fork+0x1f/0x30 [ 77.995895][ T3661] freed in: [ 77.999006][ T3661] linkwatch_do_dev+0x96/0x160 [ 78.004014][ T3661] __linkwatch_run_queue+0x233/0x6a0 [ 78.009496][ T3661] linkwatch_event+0x4a/0x60 [ 78.014099][ T3661] process_one_work+0x9ac/0x1680 [ 78.019034][ T3661] worker_thread+0x652/0x11c0 [ 78.023719][ T3661] kthread+0x405/0x4f0 [ 78.027810][ T3661] ret_from_fork+0x1f/0x30 [ 78.042541][ T3661] ------------[ cut here ]------------ [ 78.048253][ T3661] WARNING: CPU: 0 PID: 3661 at lib/ref_tracker.c:120 ref_tracker_free.cold+0x110/0x14e [ 78.062364][ T3661] Modules linked in: [ 78.066424][ T3661] CPU: 0 PID: 3661 Comm: kworker/0:5 Not tainted 5.16.0-rc4-next-20211210-syzkaller #0 [ 78.076075][ T3661] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ 78.090648][ T3661] Workqueue: events linkwatch_event [ 78.095890][ T3661] RIP: 0010:ref_tracker_free.cold+0x110/0x14e [ 78.102191][ T3661] Code: ea 03 48 c1 e0 2a 0f b6 04 02 84 c0 74 04 3c 03 7e 4c 8b 7b 18 e8 6b 54 e9 fa e8 26 4d 57 f8 4c 89 ee 48 89 ef e8 fb 33 36 00 <0f> 0b 41 bd ea ff ff ff e9 bd 60 e9 fa 4c 89 f7 e8 16 45 a2 f8 e9 [ 78.127211][ T3661] RSP: 0018:ffffc90002b5fb18 EFLAGS: 00010246 [ 78.133684][ T3661] RAX: 0000000000000000 RBX: ffff88807467f700 RCX: 0000000000000000 [ 78.141928][ T3661] RDX: 0000000000000001 RSI: 0000000000000001 RDI: 0000000000000001 [ 78.150087][ T3661] RBP: ffff888057e105b8 R08: 0000000000000001 R09: ffffffff8ffa1967 [ 78.158211][ T3661] R10: 0000000000000001 R11: 0000000000000000 R12: 1ffff9200056bf65 [ 78.166204][ T3661] R13: 0000000000000292 R14: ffff88807467f718 R15: 00000000c0e0008c [ 78.174321][ T3661] FS: 0000000000000000(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 [ 78.183310][ T3661] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 78.190156][ T3661] CR2: 000000c000208800 CR3: 000000007f7b5000 CR4: 00000000003506f0 [ 78.198235][ T3661] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 78.206214][ T3661] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 78.214328][ T3661] Call Trace: [ 78.217679][ T3661] <TASK> [ 78.220621][ T3661] ? __sanitizer_cov_trace_const_cmp4+0x1c/0x70 [ 78.226981][ T3661] ? nlmsg_notify+0xbe/0x280 [ 78.231607][ T3661] ? ref_tracker_dir_exit+0x330/0x330 [ 78.237654][ T3661] ? linkwatch_do_dev+0x96/0x160 [ 78.242628][ T3661] ? __linkwatch_run_queue+0x233/0x6a0 [ 78.248170][ T3661] ? linkwatch_event+0x4a/0x60 [ 78.252946][ T3661] ? process_one_work+0x9ac/0x1680 [ 78.258136][ T3661] ? worker_thread+0x853/0x11c0 [ 78.263020][ T3661] ? kthread+0x405/0x4f0 [ 78.267905][ T3661] ? ret_from_fork+0x1f/0x30 [ 78.272670][ T3661] ? netdev_state_change+0xa1/0x130 [ 78.278019][ T3661] ? netdev_exit+0xd0/0xd0 [ 78.282466][ T3661] ? dev_activate+0x420/0xa60 [ 78.287261][ T3661] linkwatch_do_dev+0x96/0x160 [ 78.292043][ T3661] __linkwatch_run_queue+0x233/0x6a0 [ 78.297505][ T3661] ? linkwatch_do_dev+0x160/0x160 [ 78.302561][ T3661] linkwatch_event+0x4a/0x60 [ 78.307225][ T3661] process_one_work+0x9ac/0x1680 [ 78.312292][ T3661] ? pwq_dec_nr_in_flight+0x2a0/0x2a0 [ 78.317757][ T3661] ? rwlock_bug.part.0+0x90/0x90 [ 78.322726][ T3661] ? _raw_spin_lock_irq+0x41/0x50 [ 78.327844][ T3661] worker_thread+0x853/0x11c0 [ 78.332543][ T3661] ? process_one_work+0x1680/0x1680 [ 78.338500][ T3661] kthread+0x405/0x4f0 [ 78.342610][ T3661] ? set_kthread_struct+0x130/0x130 Fixes: 63f13937cbe9 ("net: linkwatch: add net device refcount tracker") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Link: https://lore.kernel.org/r/20211214051955.3569843-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-14 13:19:55 +08:00
/* We must free netdev tracker under
* the spinlock protection.
*/
netdev_tracker_free(dev, &dev->linkwatch_dev_tracker);
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
spin_unlock_irq(&lweventlist_lock);
linkwatch_do_dev(dev);
do_dev--;
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
spin_lock_irq(&lweventlist_lock);
}
/* Add the remaining work back to lweventlist */
list_splice_init(&wrk, &lweventlist);
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
if (!list_empty(&lweventlist))
linkwatch_schedule_work(0);
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
spin_unlock_irq(&lweventlist_lock);
}
void linkwatch_forget_dev(struct net_device *dev)
{
unsigned long flags;
int clean = 0;
spin_lock_irqsave(&lweventlist_lock, flags);
if (!list_empty(&dev->link_watch_list)) {
list_del_init(&dev->link_watch_list);
clean = 1;
net: linkwatch: be more careful about dev->linkwatch_dev_tracker Apparently a concurrent linkwatch_add_event() could run while we are in __linkwatch_run_queue(). We need to free dev->linkwatch_dev_tracker tracker under lweventlist_lock protection to avoid this race. syzbot report: [ 77.935949][ T3661] reference already released. [ 77.941015][ T3661] allocated in: [ 77.944482][ T3661] linkwatch_fire_event+0x202/0x260 [ 77.950318][ T3661] netif_carrier_on+0x9c/0x100 [ 77.955120][ T3661] __ieee80211_sta_join_ibss+0xc52/0x1590 [ 77.960888][ T3661] ieee80211_sta_create_ibss.cold+0xd2/0x11f [ 77.966908][ T3661] ieee80211_ibss_work.cold+0x30e/0x60f [ 77.972483][ T3661] ieee80211_iface_work+0xb70/0xd00 [ 77.977715][ T3661] process_one_work+0x9ac/0x1680 [ 77.982671][ T3661] worker_thread+0x652/0x11c0 [ 77.987371][ T3661] kthread+0x405/0x4f0 [ 77.991465][ T3661] ret_from_fork+0x1f/0x30 [ 77.995895][ T3661] freed in: [ 77.999006][ T3661] linkwatch_do_dev+0x96/0x160 [ 78.004014][ T3661] __linkwatch_run_queue+0x233/0x6a0 [ 78.009496][ T3661] linkwatch_event+0x4a/0x60 [ 78.014099][ T3661] process_one_work+0x9ac/0x1680 [ 78.019034][ T3661] worker_thread+0x652/0x11c0 [ 78.023719][ T3661] kthread+0x405/0x4f0 [ 78.027810][ T3661] ret_from_fork+0x1f/0x30 [ 78.042541][ T3661] ------------[ cut here ]------------ [ 78.048253][ T3661] WARNING: CPU: 0 PID: 3661 at lib/ref_tracker.c:120 ref_tracker_free.cold+0x110/0x14e [ 78.062364][ T3661] Modules linked in: [ 78.066424][ T3661] CPU: 0 PID: 3661 Comm: kworker/0:5 Not tainted 5.16.0-rc4-next-20211210-syzkaller #0 [ 78.076075][ T3661] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ 78.090648][ T3661] Workqueue: events linkwatch_event [ 78.095890][ T3661] RIP: 0010:ref_tracker_free.cold+0x110/0x14e [ 78.102191][ T3661] Code: ea 03 48 c1 e0 2a 0f b6 04 02 84 c0 74 04 3c 03 7e 4c 8b 7b 18 e8 6b 54 e9 fa e8 26 4d 57 f8 4c 89 ee 48 89 ef e8 fb 33 36 00 <0f> 0b 41 bd ea ff ff ff e9 bd 60 e9 fa 4c 89 f7 e8 16 45 a2 f8 e9 [ 78.127211][ T3661] RSP: 0018:ffffc90002b5fb18 EFLAGS: 00010246 [ 78.133684][ T3661] RAX: 0000000000000000 RBX: ffff88807467f700 RCX: 0000000000000000 [ 78.141928][ T3661] RDX: 0000000000000001 RSI: 0000000000000001 RDI: 0000000000000001 [ 78.150087][ T3661] RBP: ffff888057e105b8 R08: 0000000000000001 R09: ffffffff8ffa1967 [ 78.158211][ T3661] R10: 0000000000000001 R11: 0000000000000000 R12: 1ffff9200056bf65 [ 78.166204][ T3661] R13: 0000000000000292 R14: ffff88807467f718 R15: 00000000c0e0008c [ 78.174321][ T3661] FS: 0000000000000000(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 [ 78.183310][ T3661] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 78.190156][ T3661] CR2: 000000c000208800 CR3: 000000007f7b5000 CR4: 00000000003506f0 [ 78.198235][ T3661] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 78.206214][ T3661] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 78.214328][ T3661] Call Trace: [ 78.217679][ T3661] <TASK> [ 78.220621][ T3661] ? __sanitizer_cov_trace_const_cmp4+0x1c/0x70 [ 78.226981][ T3661] ? nlmsg_notify+0xbe/0x280 [ 78.231607][ T3661] ? ref_tracker_dir_exit+0x330/0x330 [ 78.237654][ T3661] ? linkwatch_do_dev+0x96/0x160 [ 78.242628][ T3661] ? __linkwatch_run_queue+0x233/0x6a0 [ 78.248170][ T3661] ? linkwatch_event+0x4a/0x60 [ 78.252946][ T3661] ? process_one_work+0x9ac/0x1680 [ 78.258136][ T3661] ? worker_thread+0x853/0x11c0 [ 78.263020][ T3661] ? kthread+0x405/0x4f0 [ 78.267905][ T3661] ? ret_from_fork+0x1f/0x30 [ 78.272670][ T3661] ? netdev_state_change+0xa1/0x130 [ 78.278019][ T3661] ? netdev_exit+0xd0/0xd0 [ 78.282466][ T3661] ? dev_activate+0x420/0xa60 [ 78.287261][ T3661] linkwatch_do_dev+0x96/0x160 [ 78.292043][ T3661] __linkwatch_run_queue+0x233/0x6a0 [ 78.297505][ T3661] ? linkwatch_do_dev+0x160/0x160 [ 78.302561][ T3661] linkwatch_event+0x4a/0x60 [ 78.307225][ T3661] process_one_work+0x9ac/0x1680 [ 78.312292][ T3661] ? pwq_dec_nr_in_flight+0x2a0/0x2a0 [ 78.317757][ T3661] ? rwlock_bug.part.0+0x90/0x90 [ 78.322726][ T3661] ? _raw_spin_lock_irq+0x41/0x50 [ 78.327844][ T3661] worker_thread+0x853/0x11c0 [ 78.332543][ T3661] ? process_one_work+0x1680/0x1680 [ 78.338500][ T3661] kthread+0x405/0x4f0 [ 78.342610][ T3661] ? set_kthread_struct+0x130/0x130 Fixes: 63f13937cbe9 ("net: linkwatch: add net device refcount tracker") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Link: https://lore.kernel.org/r/20211214051955.3569843-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-14 13:19:55 +08:00
/* We must release netdev tracker under
* the spinlock protection.
*/
netdev_tracker_free(dev, &dev->linkwatch_dev_tracker);
linkwatch: linkwatch_forget_dev() to speedup device dismantle Herbert Xu a écrit : > On Tue, Nov 17, 2009 at 04:26:04AM -0800, David Miller wrote: >> Really, the link watch stuff is just due for a redesign. I don't >> think a simple hack is going to cut it this time, sorry Eric :-) > > I have no objections against any redesigns, but since the only > caller of linkwatch_forget_dev runs in process context with the > RTNL, it could also legally emit those events. Thanks guys, here an updated version then, before linkwatch surgery ? In this version, I force the event to be sent synchronously. [PATCH net-next-2.6] linkwatch: linkwatch_forget_dev() to speedup device dismantle time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.266s user 0m0.000s sys 0m0.001s real 0m0.770s user 0m0.000s sys 0m0.000s real 0m1.022s user 0m0.000s sys 0m0.000s One problem of current schem in vlan dismantle phase is the holding of device done by following chain : vlan_dev_stop() -> netif_carrier_off(dev) -> linkwatch_fire_event(dev) -> dev_hold() ... And __linkwatch_run_queue() runs up to one second later... A generic fix to this problem is to add a linkwatch_forget_dev() method to unlink the device from the list of watched devices. dev->link_watch_next becomes dev->link_watch_list (and use a bit more memory), to be able to unlink device in O(1). After patch : time ip link del eth3.103 ; time ip link del eth3.104 ; time ip link del eth3.105 real 0m0.024s user 0m0.000s sys 0m0.000s real 0m0.032s user 0m0.000s sys 0m0.001s real 0m0.033s user 0m0.000s sys 0m0.000s Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2009-11-17 13:59:21 +08:00
}
spin_unlock_irqrestore(&lweventlist_lock, flags);
if (clean)
linkwatch_do_dev(dev);
}
/* Must be called with the rtnl semaphore held */
void linkwatch_run_queue(void)
{
__linkwatch_run_queue(0);
}
static void linkwatch_event(struct work_struct *dummy)
{
rtnl_lock();
__linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies));
rtnl_unlock();
}
void linkwatch_fire_event(struct net_device *dev)
{
bool urgent = linkwatch_urgent_event(dev);
if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
linkwatch_add_event(dev);
} else if (!urgent)
return;
linkwatch_schedule_work(urgent);
}
EXPORT_SYMBOL(linkwatch_fire_event);