bnxt_en: bring back rtnl_lock() in the bnxt_open() path

Error recovery, PCIe AER, resume, and TX timeout will invoke bnxt_open()
with netdev_lock only.  This will cause RTNL assert failure in
netif_set_real_num_tx_queues(), netif_set_real_num_tx_queues(),
and netif_set_real_num_tx_queues().

Example error recovery assert:

RTNL: assertion failed at net/core/dev.c (3178)
WARNING: CPU: 3 PID: 3392 at net/core/dev.c:3178 netif_set_real_num_tx_queues+0x1fd/0x210

Call Trace:
 <TASK>
 ? __pfx_bnxt_msix+0x10/0x10 [bnxt_en]
 __bnxt_open_nic+0x1ef/0xb20 [bnxt_en]
 bnxt_open+0xda/0x130 [bnxt_en]
 bnxt_fw_reset_task+0x21f/0x780 [bnxt_en]
 process_scheduled_works+0x9d/0x400

For now, bring back rtnl_lock() in all these code paths that can invoke
bnxt_open().  In the bnxt_queue_start() error path, we don't have
rtnl_lock held so we just change it to call netif_close() instead of
bnxt_reset_task() for simplicity.  This error path is unlikely so it
should be fine.

Fixes: 004b500801 ("eth: bnxt: remove most dependencies on RTNL")
Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20250514062908.2766677-1-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
pull/1231/merge
Michael Chan 2025-05-13 23:29:08 -07:00 committed by Jakub Kicinski
parent 92ec485503
commit 325eb217e4
1 changed files with 29 additions and 7 deletions

View File

@ -14013,13 +14013,28 @@ static void bnxt_unlock_sp(struct bnxt *bp)
netdev_unlock(bp->dev);
}
/* Same as bnxt_lock_sp() with additional rtnl_lock */
static void bnxt_rtnl_lock_sp(struct bnxt *bp)
{
clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
rtnl_lock();
netdev_lock(bp->dev);
}
static void bnxt_rtnl_unlock_sp(struct bnxt *bp)
{
set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
netdev_unlock(bp->dev);
rtnl_unlock();
}
/* Only called from bnxt_sp_task() */
static void bnxt_reset(struct bnxt *bp, bool silent)
{
bnxt_lock_sp(bp);
bnxt_rtnl_lock_sp(bp);
if (test_bit(BNXT_STATE_OPEN, &bp->state))
bnxt_reset_task(bp, silent);
bnxt_unlock_sp(bp);
bnxt_rtnl_unlock_sp(bp);
}
/* Only called from bnxt_sp_task() */
@ -14027,9 +14042,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
{
int i;
bnxt_lock_sp(bp);
bnxt_rtnl_lock_sp(bp);
if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
bnxt_unlock_sp(bp);
bnxt_rtnl_unlock_sp(bp);
return;
}
/* Disable and flush TPA before resetting the RX ring */
@ -14068,7 +14083,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
}
if (bp->flags & BNXT_FLAG_TPA)
bnxt_set_tpa(bp, true);
bnxt_unlock_sp(bp);
bnxt_rtnl_unlock_sp(bp);
}
static void bnxt_fw_fatal_close(struct bnxt *bp)
@ -14960,15 +14975,17 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING;
fallthrough;
case BNXT_FW_RESET_STATE_OPENING:
while (!netdev_trylock(bp->dev)) {
while (!rtnl_trylock()) {
bnxt_queue_fw_reset_work(bp, HZ / 10);
return;
}
netdev_lock(bp->dev);
rc = bnxt_open(bp->dev);
if (rc) {
netdev_err(bp->dev, "bnxt_open() failed during FW reset\n");
bnxt_fw_reset_abort(bp, rc);
netdev_unlock(bp->dev);
rtnl_unlock();
goto ulp_start;
}
@ -14988,6 +15005,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bnxt_dl_health_fw_status_update(bp, true);
}
netdev_unlock(bp->dev);
rtnl_unlock();
bnxt_ulp_start(bp, 0);
bnxt_reenable_sriov(bp);
netdev_lock(bp->dev);
@ -15936,7 +15954,7 @@ err_reset:
rc);
napi_enable_locked(&bnapi->napi);
bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
bnxt_reset_task(bp, true);
netif_close(dev);
return rc;
}
@ -16752,6 +16770,7 @@ static int bnxt_resume(struct device *device)
struct bnxt *bp = netdev_priv(dev);
int rc = 0;
rtnl_lock();
netdev_lock(dev);
rc = pci_enable_device(bp->pdev);
if (rc) {
@ -16796,6 +16815,7 @@ static int bnxt_resume(struct device *device)
resume_exit:
netdev_unlock(bp->dev);
rtnl_unlock();
bnxt_ulp_start(bp, rc);
if (!rc)
bnxt_reenable_sriov(bp);
@ -16961,6 +16981,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
int err;
netdev_info(bp->dev, "PCI Slot Resume\n");
rtnl_lock();
netdev_lock(netdev);
err = bnxt_hwrm_func_qcaps(bp);
@ -16978,6 +16999,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
netif_device_attach(netdev);
netdev_unlock(netdev);
rtnl_unlock();
bnxt_ulp_start(bp, err);
if (!err)
bnxt_reenable_sriov(bp);