Merge branch 'mlx5-misc-fixes-2025-09-28'

Tariq Toukan says:

====================
mlx5 misc fixes 2025-09-28

misc bug fixes from the team to the mlx5 core driver.
====================

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
pull/1354/merge
Jakub Kicinski 2025-09-29 18:50:51 -07:00
commit eb11f02f31
3 changed files with 34 additions and 3 deletions

View File

@ -294,6 +294,10 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
return;
}
cond_resched();
if (mlx5_cmd_is_down(dev)) {
ent->ret = -ENXIO;
return;
}
} while (time_before(jiffies, poll_end));
ent->ret = -ETIMEDOUT;
@ -1070,7 +1074,7 @@ static void cmd_work_handler(struct work_struct *work)
poll_timeout(ent);
/* make sure we read the descriptor after ownership is SW */
rmb();
mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, (ent->ret == -ETIMEDOUT));
mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, !!ent->ret);
}
}

View File

@ -27,6 +27,7 @@ struct mlx5_fw_reset {
struct work_struct reset_reload_work;
struct work_struct reset_now_work;
struct work_struct reset_abort_work;
struct delayed_work reset_timeout_work;
unsigned long reset_flags;
u8 reset_method;
struct timer_list timer;
@ -259,6 +260,8 @@ static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool
return -EALREADY;
}
if (current_work() != &fw_reset->reset_timeout_work.work)
cancel_delayed_work(&fw_reset->reset_timeout_work);
mlx5_stop_sync_reset_poll(dev);
if (poll_health)
mlx5_start_health_poll(dev);
@ -330,6 +333,11 @@ static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
}
mlx5_stop_health_poll(dev, true);
mlx5_start_sync_reset_poll(dev);
if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
&fw_reset->reset_flags))
schedule_delayed_work(&fw_reset->reset_timeout_work,
msecs_to_jiffies(mlx5_tout_ms(dev, PCI_SYNC_UPDATE)));
return 0;
}
@ -739,6 +747,19 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct
}
}
static void mlx5_sync_reset_timeout_work(struct work_struct *work)
{
struct delayed_work *dwork = container_of(work, struct delayed_work,
work);
struct mlx5_fw_reset *fw_reset =
container_of(dwork, struct mlx5_fw_reset, reset_timeout_work);
struct mlx5_core_dev *dev = fw_reset->dev;
if (mlx5_sync_reset_clear_reset_requested(dev, true))
return;
mlx5_core_warn(dev, "PCI Sync FW Update Reset Timeout.\n");
}
static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data)
{
struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
@ -822,6 +843,7 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
cancel_work_sync(&fw_reset->reset_reload_work);
cancel_work_sync(&fw_reset->reset_now_work);
cancel_work_sync(&fw_reset->reset_abort_work);
cancel_delayed_work(&fw_reset->reset_timeout_work);
}
static const struct devlink_param mlx5_fw_reset_devlink_params[] = {
@ -865,6 +887,8 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work);
INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event);
INIT_DELAYED_WORK(&fw_reset->reset_timeout_work,
mlx5_sync_reset_timeout_work);
init_completion(&fw_reset->done);
return 0;

View File

@ -489,9 +489,12 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
u32 func_id;
u32 npages;
u32 i = 0;
int err;
if (!mlx5_cmd_is_down(dev))
return mlx5_cmd_do(dev, in, in_size, out, out_size);
err = mlx5_cmd_do(dev, in, in_size, out, out_size);
/* If FW is gone (-ENXIO), proceed to forceful reclaim */
if (err != -ENXIO)
return err;
/* No hard feelings, we want our pages back! */
npages = MLX5_GET(manage_pages_in, in, input_num_entries);