bnxt_en: bring back rtnl_lock() in the bnxt_open() path
Error recovery, PCIe AER, resume, and TX timeout will invoke bnxt_open()
with netdev_lock only. This will cause RTNL assert failure in
netif_set_real_num_tx_queues(), netif_set_real_num_tx_queues(),
and netif_set_real_num_tx_queues().
Example error recovery assert:
RTNL: assertion failed at net/core/dev.c (3178)
WARNING: CPU: 3 PID: 3392 at net/core/dev.c:3178 netif_set_real_num_tx_queues+0x1fd/0x210
Call Trace:
<TASK>
? __pfx_bnxt_msix+0x10/0x10 [bnxt_en]
__bnxt_open_nic+0x1ef/0xb20 [bnxt_en]
bnxt_open+0xda/0x130 [bnxt_en]
bnxt_fw_reset_task+0x21f/0x780 [bnxt_en]
process_scheduled_works+0x9d/0x400
For now, bring back rtnl_lock() in all these code paths that can invoke
bnxt_open(). In the bnxt_queue_start() error path, we don't have
rtnl_lock held so we just change it to call netif_close() instead of
bnxt_reset_task() for simplicity. This error path is unlikely so it
should be fine.
Fixes: 004b500801 ("eth: bnxt: remove most dependencies on RTNL")
Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20250514062908.2766677-1-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
pull/1231/merge
parent
92ec485503
commit
325eb217e4
|
|
@ -14013,13 +14013,28 @@ static void bnxt_unlock_sp(struct bnxt *bp)
|
|||
netdev_unlock(bp->dev);
|
||||
}
|
||||
|
||||
/* Same as bnxt_lock_sp() with additional rtnl_lock */
|
||||
static void bnxt_rtnl_lock_sp(struct bnxt *bp)
|
||||
{
|
||||
clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
|
||||
rtnl_lock();
|
||||
netdev_lock(bp->dev);
|
||||
}
|
||||
|
||||
static void bnxt_rtnl_unlock_sp(struct bnxt *bp)
|
||||
{
|
||||
set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
|
||||
netdev_unlock(bp->dev);
|
||||
rtnl_unlock();
|
||||
}
|
||||
|
||||
/* Only called from bnxt_sp_task() */
|
||||
static void bnxt_reset(struct bnxt *bp, bool silent)
|
||||
{
|
||||
bnxt_lock_sp(bp);
|
||||
bnxt_rtnl_lock_sp(bp);
|
||||
if (test_bit(BNXT_STATE_OPEN, &bp->state))
|
||||
bnxt_reset_task(bp, silent);
|
||||
bnxt_unlock_sp(bp);
|
||||
bnxt_rtnl_unlock_sp(bp);
|
||||
}
|
||||
|
||||
/* Only called from bnxt_sp_task() */
|
||||
|
|
@ -14027,9 +14042,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
|
|||
{
|
||||
int i;
|
||||
|
||||
bnxt_lock_sp(bp);
|
||||
bnxt_rtnl_lock_sp(bp);
|
||||
if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
|
||||
bnxt_unlock_sp(bp);
|
||||
bnxt_rtnl_unlock_sp(bp);
|
||||
return;
|
||||
}
|
||||
/* Disable and flush TPA before resetting the RX ring */
|
||||
|
|
@ -14068,7 +14083,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
|
|||
}
|
||||
if (bp->flags & BNXT_FLAG_TPA)
|
||||
bnxt_set_tpa(bp, true);
|
||||
bnxt_unlock_sp(bp);
|
||||
bnxt_rtnl_unlock_sp(bp);
|
||||
}
|
||||
|
||||
static void bnxt_fw_fatal_close(struct bnxt *bp)
|
||||
|
|
@ -14960,15 +14975,17 @@ static void bnxt_fw_reset_task(struct work_struct *work)
|
|||
bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING;
|
||||
fallthrough;
|
||||
case BNXT_FW_RESET_STATE_OPENING:
|
||||
while (!netdev_trylock(bp->dev)) {
|
||||
while (!rtnl_trylock()) {
|
||||
bnxt_queue_fw_reset_work(bp, HZ / 10);
|
||||
return;
|
||||
}
|
||||
netdev_lock(bp->dev);
|
||||
rc = bnxt_open(bp->dev);
|
||||
if (rc) {
|
||||
netdev_err(bp->dev, "bnxt_open() failed during FW reset\n");
|
||||
bnxt_fw_reset_abort(bp, rc);
|
||||
netdev_unlock(bp->dev);
|
||||
rtnl_unlock();
|
||||
goto ulp_start;
|
||||
}
|
||||
|
||||
|
|
@ -14988,6 +15005,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
|
|||
bnxt_dl_health_fw_status_update(bp, true);
|
||||
}
|
||||
netdev_unlock(bp->dev);
|
||||
rtnl_unlock();
|
||||
bnxt_ulp_start(bp, 0);
|
||||
bnxt_reenable_sriov(bp);
|
||||
netdev_lock(bp->dev);
|
||||
|
|
@ -15936,7 +15954,7 @@ err_reset:
|
|||
rc);
|
||||
napi_enable_locked(&bnapi->napi);
|
||||
bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
|
||||
bnxt_reset_task(bp, true);
|
||||
netif_close(dev);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
|
@ -16752,6 +16770,7 @@ static int bnxt_resume(struct device *device)
|
|||
struct bnxt *bp = netdev_priv(dev);
|
||||
int rc = 0;
|
||||
|
||||
rtnl_lock();
|
||||
netdev_lock(dev);
|
||||
rc = pci_enable_device(bp->pdev);
|
||||
if (rc) {
|
||||
|
|
@ -16796,6 +16815,7 @@ static int bnxt_resume(struct device *device)
|
|||
|
||||
resume_exit:
|
||||
netdev_unlock(bp->dev);
|
||||
rtnl_unlock();
|
||||
bnxt_ulp_start(bp, rc);
|
||||
if (!rc)
|
||||
bnxt_reenable_sriov(bp);
|
||||
|
|
@ -16961,6 +16981,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
|
|||
int err;
|
||||
|
||||
netdev_info(bp->dev, "PCI Slot Resume\n");
|
||||
rtnl_lock();
|
||||
netdev_lock(netdev);
|
||||
|
||||
err = bnxt_hwrm_func_qcaps(bp);
|
||||
|
|
@ -16978,6 +16999,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
|
|||
netif_device_attach(netdev);
|
||||
|
||||
netdev_unlock(netdev);
|
||||
rtnl_unlock();
|
||||
bnxt_ulp_start(bp, err);
|
||||
if (!err)
|
||||
bnxt_reenable_sriov(bp);
|
||||
|
|
|
|||
Loading…
Reference in New Issue