From f53a2ce893b2c7884ef94471f170839170a4eba0 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Feb 2022 18:15:47 +0200 Subject: [PATCH 1/7] net: dsa: mv88e6xxx: don't use devres for mdiobus As explained in commits: 74b6d7d13307 ("net: dsa: realtek: register the MDIO bus under devres") 5135e96a3dd2 ("net: dsa: don't allocate the slave_mii_bus using devres") mdiobus_free() will panic when called from devm_mdiobus_free() <- devres_release_all() <- __device_release_driver(), and that mdiobus was not previously unregistered. The mv88e6xxx is an MDIO device, so the initial set of constraints that I thought would cause this (I2C or SPI buses which call ->remove on ->shutdown) do not apply. But there is one more which applies here. If the DSA master itself is on a bus that calls ->remove from ->shutdown (like dpaa2-eth, which is on the fsl-mc bus), there is a device link between the switch and the DSA master, and device_links_unbind_consumers() will unbind the Marvell switch driver on shutdown. systemd-shutdown[1]: Powering off. mv88e6085 0x0000000008b96000:00 sw_gl0: Link is Down fsl-mc dpbp.9: Removing from iommu group 7 fsl-mc dpbp.8: Removing from iommu group 7 ------------[ cut here ]------------ kernel BUG at drivers/net/phy/mdio_bus.c:677! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 1 Comm: systemd-shutdow Not tainted 5.16.5-00040-gdc05f73788e5 #15 pc : mdiobus_free+0x44/0x50 lr : devm_mdiobus_free+0x10/0x20 Call trace: mdiobus_free+0x44/0x50 devm_mdiobus_free+0x10/0x20 devres_release_all+0xa0/0x100 __device_release_driver+0x190/0x220 device_release_driver_internal+0xac/0xb0 device_links_unbind_consumers+0xd4/0x100 __device_release_driver+0x4c/0x220 device_release_driver_internal+0xac/0xb0 device_links_unbind_consumers+0xd4/0x100 __device_release_driver+0x94/0x220 device_release_driver+0x28/0x40 bus_remove_device+0x118/0x124 device_del+0x174/0x420 fsl_mc_device_remove+0x24/0x40 __fsl_mc_device_remove+0xc/0x20 device_for_each_child+0x58/0xa0 dprc_remove+0x90/0xb0 fsl_mc_driver_remove+0x20/0x5c __device_release_driver+0x21c/0x220 device_release_driver+0x28/0x40 bus_remove_device+0x118/0x124 device_del+0x174/0x420 fsl_mc_bus_remove+0x80/0x100 fsl_mc_bus_shutdown+0xc/0x1c platform_shutdown+0x20/0x30 device_shutdown+0x154/0x330 kernel_power_off+0x34/0x6c __do_sys_reboot+0x15c/0x250 __arm64_sys_reboot+0x20/0x30 invoke_syscall.constprop.0+0x4c/0xe0 do_el0_svc+0x4c/0x150 el0_svc+0x24/0xb0 el0t_64_sync_handler+0xa8/0xb0 el0t_64_sync+0x178/0x17c So the same treatment must be applied to all DSA switch drivers, which is: either use devres for both the mdiobus allocation and registration, or don't use devres at all. The Marvell driver already has a good structure for mdiobus removal, so just plug in mdiobus_free and get rid of devres. Fixes: ac3a68d56651 ("net: phy: don't abuse devres in devm_mdiobus_register()") Reported-by: Rafael Richter Signed-off-by: Vladimir Oltean Tested-by: Daniel Klauer Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 58ca684d73f7..659f29582406 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3399,7 +3399,7 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, return err; } - bus = devm_mdiobus_alloc_size(chip->dev, sizeof(*mdio_bus)); + bus = mdiobus_alloc_size(sizeof(*mdio_bus)); if (!bus) return -ENOMEM; @@ -3424,14 +3424,14 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, if (!external) { err = mv88e6xxx_g2_irq_mdio_setup(chip, bus); if (err) - return err; + goto out; } err = of_mdiobus_register(bus, np); if (err) { dev_err(chip->dev, "Cannot register MDIO bus (%d)\n", err); mv88e6xxx_g2_irq_mdio_free(chip, bus); - return err; + goto out; } if (external) @@ -3440,6 +3440,10 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, list_add(&mdio_bus->list, &chip->mdios); return 0; + +out: + mdiobus_free(bus); + return err; } static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip) @@ -3455,6 +3459,7 @@ static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip) mv88e6xxx_g2_irq_mdio_free(chip, bus); mdiobus_unregister(bus); + mdiobus_free(bus); } } From 50facd86e9fbc4b93fe02e5fe05776047f45dbfb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Feb 2022 18:15:48 +0200 Subject: [PATCH 2/7] net: dsa: ar9331: register the mdiobus under devres As explained in commits: 74b6d7d13307 ("net: dsa: realtek: register the MDIO bus under devres") 5135e96a3dd2 ("net: dsa: don't allocate the slave_mii_bus using devres") mdiobus_free() will panic when called from devm_mdiobus_free() <- devres_release_all() <- __device_release_driver(), and that mdiobus was not previously unregistered. The ar9331 is an MDIO device, so the initial set of constraints that I thought would cause this (I2C or SPI buses which call ->remove on ->shutdown) do not apply. But there is one more which applies here. If the DSA master itself is on a bus that calls ->remove from ->shutdown (like dpaa2-eth, which is on the fsl-mc bus), there is a device link between the switch and the DSA master, and device_links_unbind_consumers() will unbind the ar9331 switch driver on shutdown. So the same treatment must be applied to all DSA switch drivers, which is: either use devres for both the mdiobus allocation and registration, or don't use devres at all. The ar9331 driver doesn't have a complex code structure for mdiobus removal, so just replace of_mdiobus_register with the devres variant in order to be all-devres and ensure that we don't free a still-registered bus. Fixes: ac3a68d56651 ("net: phy: don't abuse devres in devm_mdiobus_register()") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Oleksij Rempel Signed-off-by: Jakub Kicinski --- drivers/net/dsa/qca/ar9331.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index da0d7e68643a..c39de2a4c1fe 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -378,7 +378,7 @@ static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv) if (!mnp) return -ENODEV; - ret = of_mdiobus_register(mbus, mnp); + ret = devm_of_mdiobus_register(dev, mbus, mnp); of_node_put(mnp); if (ret) return ret; @@ -1091,7 +1091,6 @@ static void ar9331_sw_remove(struct mdio_device *mdiodev) } irq_domain_remove(priv->irqdomain); - mdiobus_unregister(priv->mbus); dsa_unregister_switch(&priv->ds); reset_control_assert(priv->sw_reset); From 08f1a20822349004bb9cc1b153ecb516e9f2889d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Feb 2022 18:15:49 +0200 Subject: [PATCH 3/7] net: dsa: bcm_sf2: don't use devres for mdiobus As explained in commits: 74b6d7d13307 ("net: dsa: realtek: register the MDIO bus under devres") 5135e96a3dd2 ("net: dsa: don't allocate the slave_mii_bus using devres") mdiobus_free() will panic when called from devm_mdiobus_free() <- devres_release_all() <- __device_release_driver(), and that mdiobus was not previously unregistered. The Starfighter 2 is a platform device, so the initial set of constraints that I thought would cause this (I2C or SPI buses which call ->remove on ->shutdown) do not apply. But there is one more which applies here. If the DSA master itself is on a bus that calls ->remove from ->shutdown (like dpaa2-eth, which is on the fsl-mc bus), there is a device link between the switch and the DSA master, and device_links_unbind_consumers() will unbind the bcm_sf2 switch driver on shutdown. So the same treatment must be applied to all DSA switch drivers, which is: either use devres for both the mdiobus allocation and registration, or don't use devres at all. The bcm_sf2 driver has the code structure in place for orderly mdiobus removal, so just replace devm_mdiobus_alloc() with the non-devres variant, and add manual free where necessary, to ensure that we don't let devres free a still-registered bus. Fixes: ac3a68d56651 ("net: phy: don't abuse devres in devm_mdiobus_register()") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 33499fcd8848..6afb5db8244c 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -621,7 +621,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) get_device(&priv->master_mii_bus->dev); priv->master_mii_dn = dn; - priv->slave_mii_bus = devm_mdiobus_alloc(ds->dev); + priv->slave_mii_bus = mdiobus_alloc(); if (!priv->slave_mii_bus) { of_node_put(dn); return -ENOMEM; @@ -681,8 +681,10 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) } err = mdiobus_register(priv->slave_mii_bus); - if (err && dn) + if (err && dn) { + mdiobus_free(priv->slave_mii_bus); of_node_put(dn); + } return err; } @@ -690,6 +692,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv) { mdiobus_unregister(priv->slave_mii_bus); + mdiobus_free(priv->slave_mii_bus); of_node_put(priv->master_mii_dn); } From 209bdb7ec6a28c7cdf580a0a98afbc9fc3b98932 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Feb 2022 18:15:50 +0200 Subject: [PATCH 4/7] net: dsa: felix: don't use devres for mdiobus As explained in commits: 74b6d7d13307 ("net: dsa: realtek: register the MDIO bus under devres") 5135e96a3dd2 ("net: dsa: don't allocate the slave_mii_bus using devres") mdiobus_free() will panic when called from devm_mdiobus_free() <- devres_release_all() <- __device_release_driver(), and that mdiobus was not previously unregistered. The Felix VSC9959 switch is a PCI device, so the initial set of constraints that I thought would cause this (I2C or SPI buses which call ->remove on ->shutdown) do not apply. But there is one more which applies here. If the DSA master itself is on a bus that calls ->remove from ->shutdown (like dpaa2-eth, which is on the fsl-mc bus), there is a device link between the switch and the DSA master, and device_links_unbind_consumers() will unbind the felix switch driver on shutdown. So the same treatment must be applied to all DSA switch drivers, which is: either use devres for both the mdiobus allocation and registration, or don't use devres at all. The felix driver has the code structure in place for orderly mdiobus removal, so just replace devm_mdiobus_alloc_size() with the non-devres variant, and add manual free where necessary, to ensure that we don't let devres free a still-registered bus. Fixes: ac3a68d56651 ("net: phy: don't abuse devres in devm_mdiobus_register()") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index bf8d38239e7e..33f0ceae381d 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1061,7 +1061,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot) return PTR_ERR(hw); } - bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv)); + bus = mdiobus_alloc_size(sizeof(*mdio_priv)); if (!bus) return -ENOMEM; @@ -1081,6 +1081,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot) rc = mdiobus_register(bus); if (rc < 0) { dev_err(dev, "failed to register MDIO bus\n"); + mdiobus_free(bus); return rc; } @@ -1132,6 +1133,7 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot) lynx_pcs_destroy(phylink_pcs); } mdiobus_unregister(felix->imdio); + mdiobus_free(felix->imdio); } static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port, From bd488afc3b39e045ba71aab472233f2a78726e7b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Feb 2022 18:15:51 +0200 Subject: [PATCH 5/7] net: dsa: seville: register the mdiobus under devres As explained in commits: 74b6d7d13307 ("net: dsa: realtek: register the MDIO bus under devres") 5135e96a3dd2 ("net: dsa: don't allocate the slave_mii_bus using devres") mdiobus_free() will panic when called from devm_mdiobus_free() <- devres_release_all() <- __device_release_driver(), and that mdiobus was not previously unregistered. The Seville VSC9959 switch is a platform device, so the initial set of constraints that I thought would cause this (I2C or SPI buses which call ->remove on ->shutdown) do not apply. But there is one more which applies here. If the DSA master itself is on a bus that calls ->remove from ->shutdown (like dpaa2-eth, which is on the fsl-mc bus), there is a device link between the switch and the DSA master, and device_links_unbind_consumers() will unbind the seville switch driver on shutdown. So the same treatment must be applied to all DSA switch drivers, which is: either use devres for both the mdiobus allocation and registration, or don't use devres at all. The seville driver has a code structure that could accommodate both the mdiobus_unregister and mdiobus_free calls, but it has an external dependency upon mscc_miim_setup() from mdio-mscc-miim.c, which calls devm_mdiobus_alloc_size() on its behalf. So rather than restructuring that, and exporting yet one more symbol mscc_miim_teardown(), let's work with devres and replace of_mdiobus_register with the devres variant. When we use all-devres, we can ensure that devres doesn't free a still-registered bus (it either runs both callbacks, or none). Fixes: ac3a68d56651 ("net: phy: don't abuse devres in devm_mdiobus_register()") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/seville_vsc9953.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 8c1c9da61602..f2f1608a476c 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1029,7 +1029,7 @@ static int vsc9953_mdio_bus_alloc(struct ocelot *ocelot) } /* Needed in order to initialize the bus mutex lock */ - rc = of_mdiobus_register(bus, NULL); + rc = devm_of_mdiobus_register(dev, bus, NULL); if (rc < 0) { dev_err(dev, "failed to register MDIO bus\n"); return rc; @@ -1083,7 +1083,8 @@ static void vsc9953_mdio_bus_free(struct ocelot *ocelot) mdio_device_free(mdio_device); lynx_pcs_destroy(phylink_pcs); } - mdiobus_unregister(felix->imdio); + + /* mdiobus_unregister and mdiobus_free handled by devres */ } static const struct felix_info seville_info_vsc9953 = { From 9ffe3d09e32da45bb5a29cf2e80ec8d7534010c5 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Feb 2022 18:15:52 +0200 Subject: [PATCH 6/7] net: dsa: mt7530: fix kernel bug in mdiobus_free() when unbinding Nobody in this driver calls mdiobus_unregister(), which is necessary if mdiobus_register() completes successfully. So if the devres callbacks that free the mdiobus get invoked (this is the case when unbinding the driver), mdiobus_free() will BUG if the mdiobus is still registered, which it is. My speculation is that this is due to the fact that prior to commit ac3a68d56651 ("net: phy: don't abuse devres in devm_mdiobus_register()") from June 2020, _devm_mdiobus_free() used to call mdiobus_unregister(). But at the time that the mt7530 support was introduced in May 2021, the API was already changed. It's therefore likely that the blamed patch was developed on an older tree, and incorrectly adapted to net-next. This makes the Fixes: tag correct. Fix the problem by using the devres variant of mdiobus_register. Fixes: ba751e28d442 ("net: dsa: mt7530: add interrupt support") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b82512e5b33b..ff3c267d0f26 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2074,7 +2074,7 @@ mt7530_setup_mdio(struct mt7530_priv *priv) if (priv->irq) mt7530_setup_mdio_irq(priv); - ret = mdiobus_register(bus); + ret = devm_mdiobus_register(dev, bus); if (ret) { dev_err(dev, "failed to register MDIO bus: %d\n", ret); if (priv->irq) From 0d120dfb5d67edc5bcd1804e167dba2b30809afd Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 7 Feb 2022 18:15:53 +0200 Subject: [PATCH 7/7] net: dsa: lantiq_gswip: don't use devres for mdiobus As explained in commits: 74b6d7d13307 ("net: dsa: realtek: register the MDIO bus under devres") 5135e96a3dd2 ("net: dsa: don't allocate the slave_mii_bus using devres") mdiobus_free() will panic when called from devm_mdiobus_free() <- devres_release_all() <- __device_release_driver(), and that mdiobus was not previously unregistered. The GSWIP switch is a platform device, so the initial set of constraints that I thought would cause this (I2C or SPI buses which call ->remove on ->shutdown) do not apply. But there is one more which applies here. If the DSA master itself is on a bus that calls ->remove from ->shutdown (like dpaa2-eth, which is on the fsl-mc bus), there is a device link between the switch and the DSA master, and device_links_unbind_consumers() will unbind the GSWIP switch driver on shutdown. So the same treatment must be applied to all DSA switch drivers, which is: either use devres for both the mdiobus allocation and registration, or don't use devres at all. The gswip driver has the code structure in place for orderly mdiobus removal, so just replace devm_mdiobus_alloc() with the non-devres variant, and add manual free where necessary, to ensure that we don't let devres free a still-registered bus. Fixes: ac3a68d56651 ("net: phy: don't abuse devres in devm_mdiobus_register()") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq_gswip.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 46ed953e787e..320ee7fe91a8 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -498,8 +498,9 @@ static int gswip_mdio_rd(struct mii_bus *bus, int addr, int reg) static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np) { struct dsa_switch *ds = priv->ds; + int err; - ds->slave_mii_bus = devm_mdiobus_alloc(priv->dev); + ds->slave_mii_bus = mdiobus_alloc(); if (!ds->slave_mii_bus) return -ENOMEM; @@ -512,7 +513,11 @@ static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np) ds->slave_mii_bus->parent = priv->dev; ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; - return of_mdiobus_register(ds->slave_mii_bus, mdio_np); + err = of_mdiobus_register(ds->slave_mii_bus, mdio_np); + if (err) + mdiobus_free(ds->slave_mii_bus); + + return err; } static int gswip_pce_table_entry_read(struct gswip_priv *priv, @@ -2145,8 +2150,10 @@ disable_switch: gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB); dsa_unregister_switch(priv->ds); mdio_bus: - if (mdio_np) + if (mdio_np) { mdiobus_unregister(priv->ds->slave_mii_bus); + mdiobus_free(priv->ds->slave_mii_bus); + } put_mdio_node: of_node_put(mdio_np); for (i = 0; i < priv->num_gphy_fw; i++) @@ -2169,6 +2176,7 @@ static int gswip_remove(struct platform_device *pdev) if (priv->ds->slave_mii_bus) { mdiobus_unregister(priv->ds->slave_mii_bus); + mdiobus_free(priv->ds->slave_mii_bus); of_node_put(priv->ds->slave_mii_bus->dev.of_node); }