md: allow removing faulty rdev during resync
During RAID resync, faulty rdev cannot be removed and will result in
"Device or resource busy" error when attempting hot removal.
Reproduction steps:
mdadm -Cv /dev/md0 -l1 -n3 -e1.2 /dev/sd{b..d}
mdadm /dev/md0 -f /dev/sdb
mdadm /dev/md0 -r /dev/sdb
-> mdadm: hot remove failed for /dev/sdb: Device or resource busy
After commit 4b10a3bc67 ("md: ensure resync is prioritized over
recovery"), when a device becomes faulty during resync, the
md_choose_sync_action() function returns early without calling
remove_and_add_spares(), preventing faulty device removal.
This patch extracts a helper function remove_spares() to support
removing faulty devices during RAID resync operations.
Fixes: 4b10a3bc67 ("md: ensure resync is prioritized over recovery")
Signed-off-by: Zheng Qixing <zhengqixing@huawei.com>
Reviewed-by: Li Nan <linan122@huawei.com>
Link: https://lore.kernel.org/linux-raid/20250707075412.150301-1-zhengqixing@huaweicloud.com
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
pull/1309/head
parent
3ec8db61e7
commit
c0ffeb6480
|
|
@ -9459,17 +9459,11 @@ static bool md_spares_need_change(struct mddev *mddev)
|
|||
return false;
|
||||
}
|
||||
|
||||
static int remove_and_add_spares(struct mddev *mddev,
|
||||
struct md_rdev *this)
|
||||
static int remove_spares(struct mddev *mddev, struct md_rdev *this)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
int spares = 0;
|
||||
int removed = 0;
|
||||
|
||||
if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
/* Mustn't remove devices when resync thread is running */
|
||||
return 0;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if ((this == NULL || rdev == this) && rdev_removeable(rdev) &&
|
||||
!mddev->pers->hot_remove_disk(mddev, rdev)) {
|
||||
|
|
@ -9483,6 +9477,21 @@ static int remove_and_add_spares(struct mddev *mddev,
|
|||
if (removed && mddev->kobj.sd)
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
|
||||
|
||||
return removed;
|
||||
}
|
||||
|
||||
static int remove_and_add_spares(struct mddev *mddev,
|
||||
struct md_rdev *this)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
int spares = 0;
|
||||
int removed = 0;
|
||||
|
||||
if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
/* Mustn't remove devices when resync thread is running */
|
||||
return 0;
|
||||
|
||||
removed = remove_spares(mddev, this);
|
||||
if (this && removed)
|
||||
goto no_add;
|
||||
|
||||
|
|
@ -9525,6 +9534,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
|
|||
|
||||
/* Check if resync is in progress. */
|
||||
if (mddev->recovery_cp < MaxSector) {
|
||||
remove_spares(mddev, NULL);
|
||||
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
return true;
|
||||
|
|
|
|||
Loading…
Reference in New Issue