s390/pci: Fix cyclic dead-lock in zpci_zdev_put() and zpci_scan_devices()
When triggering PCI device recovery by writing into the SysFS attribute
`recover` of a Physical Function with existing child SR-IOV Virtual
Functions, lockdep is reporting a possible deadlock between three
threads:
Thread (A) Thread (B) Thread (C)
| | |
recover_store() zpci_scan_devices() zpci_scan_devices()
lock(pci_rescan_remove_lock) | |
| | |
| | zpci_bus_scan_busses()
| | lock(zbus_list_lock)
| zpci_add_device() |
| lock(zpci_add_remove_lock) |
| | ┴
| | zpci_bus_scan_bus()
| | lock(pci_rescan_remove_lock)
┴ |
zpci_zdev_put() |
lock(zpci_add_remove_lock) |
┴
zpci_bus_get()
lock(zbus_list_lock)
In zpci_bus_scan_busses() the `zbus_list_lock` is taken for the whole
duration of the function, which also includes taking
`pci_rescan_remove_lock`, among other things. But `zbus_list_lock` only
really needs to protect the modification of the global registration
`zbus_list`, it can be dropped while the functions within the list
iteration run; this way we break the cycle above.
Break up zpci_bus_scan_busses() into an "iterator" zpci_bus_get_next()
that iterates over `zbus_list` element by element, and acquires and
releases `zbus_list_lock` as necessary, but never keep holding it.
References to `zpci_bus` objects are also acquired and released.
The reference counting on `zpci_bus` objects is also changed so that all
put() and get() operations are done under the protection of
`zbus_list_lock`, and if the operation results in a modification of
`zpci_bus_list`, this modification is done in the same critical section
(apart the very first initialization). This way objects are never seen
on the list that are about to be released and/or half-initialized.
Fixes: 14c87ba812 ("s390/pci: separate zbus registration from scanning")
Suggested-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Benjamin Block <bblock@linux.ibm.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
master
parent
b1aa01d312
commit
4cb92fa763
|
|
@ -748,6 +748,7 @@ ForEachMacros:
|
|||
- 'ynl_attr_for_each_nested'
|
||||
- 'ynl_attr_for_each_payload'
|
||||
- 'zorro_for_each_dev'
|
||||
- 'zpci_bus_for_each'
|
||||
|
||||
IncludeBlocks: Preserve
|
||||
IncludeCategories:
|
||||
|
|
|
|||
|
|
@ -1148,6 +1148,7 @@ static void zpci_add_devices(struct list_head *scan_list)
|
|||
|
||||
int zpci_scan_devices(void)
|
||||
{
|
||||
struct zpci_bus *zbus;
|
||||
LIST_HEAD(scan_list);
|
||||
int rc;
|
||||
|
||||
|
|
@ -1156,7 +1157,10 @@ int zpci_scan_devices(void)
|
|||
return rc;
|
||||
|
||||
zpci_add_devices(&scan_list);
|
||||
zpci_bus_scan_busses();
|
||||
zpci_bus_for_each(zbus) {
|
||||
zpci_bus_scan_bus(zbus);
|
||||
cond_resched();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -153,23 +153,6 @@ int zpci_bus_scan_bus(struct zpci_bus *zbus)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* zpci_bus_scan_busses - Scan all registered busses
|
||||
*
|
||||
* Scan all available zbusses
|
||||
*
|
||||
*/
|
||||
void zpci_bus_scan_busses(void)
|
||||
{
|
||||
struct zpci_bus *zbus = NULL;
|
||||
|
||||
mutex_lock(&zbus_list_lock);
|
||||
list_for_each_entry(zbus, &zbus_list, bus_next) {
|
||||
zpci_bus_scan_bus(zbus);
|
||||
cond_resched();
|
||||
}
|
||||
mutex_unlock(&zbus_list_lock);
|
||||
}
|
||||
|
||||
static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev)
|
||||
{
|
||||
return !s390_pci_no_rid && zdev->rid_available &&
|
||||
|
|
@ -222,10 +205,29 @@ out_free_domain:
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void zpci_bus_release(struct kref *kref)
|
||||
/**
|
||||
* zpci_bus_release - Un-initialize resources associated with the zbus and
|
||||
* free memory
|
||||
* @kref: refcount * that is part of struct zpci_bus
|
||||
*
|
||||
* MUST be called with `zbus_list_lock` held, but the lock is released during
|
||||
* run of the function.
|
||||
*/
|
||||
static inline void zpci_bus_release(struct kref *kref)
|
||||
__releases(&zbus_list_lock)
|
||||
{
|
||||
struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref);
|
||||
|
||||
lockdep_assert_held(&zbus_list_lock);
|
||||
|
||||
list_del(&zbus->bus_next);
|
||||
mutex_unlock(&zbus_list_lock);
|
||||
|
||||
/*
|
||||
* At this point no-one should see this object, or be able to get a new
|
||||
* reference to it.
|
||||
*/
|
||||
|
||||
if (zbus->bus) {
|
||||
pci_lock_rescan_remove();
|
||||
pci_stop_root_bus(zbus->bus);
|
||||
|
|
@ -237,16 +239,19 @@ static void zpci_bus_release(struct kref *kref)
|
|||
pci_unlock_rescan_remove();
|
||||
}
|
||||
|
||||
mutex_lock(&zbus_list_lock);
|
||||
list_del(&zbus->bus_next);
|
||||
mutex_unlock(&zbus_list_lock);
|
||||
zpci_remove_parent_msi_domain(zbus);
|
||||
kfree(zbus);
|
||||
}
|
||||
|
||||
static void zpci_bus_put(struct zpci_bus *zbus)
|
||||
static inline void __zpci_bus_get(struct zpci_bus *zbus)
|
||||
{
|
||||
kref_put(&zbus->kref, zpci_bus_release);
|
||||
lockdep_assert_held(&zbus_list_lock);
|
||||
kref_get(&zbus->kref);
|
||||
}
|
||||
|
||||
static inline void zpci_bus_put(struct zpci_bus *zbus)
|
||||
{
|
||||
kref_put_mutex(&zbus->kref, zpci_bus_release, &zbus_list_lock);
|
||||
}
|
||||
|
||||
static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
|
||||
|
|
@ -258,7 +263,7 @@ static struct zpci_bus *zpci_bus_get(int topo, bool topo_is_tid)
|
|||
if (!zbus->multifunction)
|
||||
continue;
|
||||
if (topo_is_tid == zbus->topo_is_tid && topo == zbus->topo) {
|
||||
kref_get(&zbus->kref);
|
||||
__zpci_bus_get(zbus);
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
|
@ -268,6 +273,44 @@ out_unlock:
|
|||
return zbus;
|
||||
}
|
||||
|
||||
/**
|
||||
* zpci_bus_get_next - get the next zbus object from given position in the list
|
||||
* @pos: current position/cursor in the global zbus list
|
||||
*
|
||||
* Acquires and releases references as the cursor iterates (might also free/
|
||||
* release the cursor). Is tolerant of concurrent operations on the list.
|
||||
*
|
||||
* To begin the iteration, set *@pos to %NULL before calling the function.
|
||||
*
|
||||
* *@pos is set to %NULL in cases where either the list is empty, or *@pos is
|
||||
* the last element in the list.
|
||||
*
|
||||
* Context: Process context. May sleep.
|
||||
*/
|
||||
void zpci_bus_get_next(struct zpci_bus **pos)
|
||||
{
|
||||
struct zpci_bus *curp = *pos, *next = NULL;
|
||||
|
||||
mutex_lock(&zbus_list_lock);
|
||||
if (curp)
|
||||
next = list_next_entry(curp, bus_next);
|
||||
else
|
||||
next = list_first_entry(&zbus_list, typeof(*curp), bus_next);
|
||||
|
||||
if (list_entry_is_head(next, &zbus_list, bus_next))
|
||||
next = NULL;
|
||||
|
||||
if (next)
|
||||
__zpci_bus_get(next);
|
||||
|
||||
*pos = next;
|
||||
mutex_unlock(&zbus_list_lock);
|
||||
|
||||
/* zpci_bus_put() might drop refcount to 0 and locks zbus_list_lock */
|
||||
if (curp)
|
||||
zpci_bus_put(curp);
|
||||
}
|
||||
|
||||
static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
|
||||
{
|
||||
struct zpci_bus *zbus;
|
||||
|
|
@ -279,9 +322,6 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
|
|||
zbus->topo = topo;
|
||||
zbus->topo_is_tid = topo_is_tid;
|
||||
INIT_LIST_HEAD(&zbus->bus_next);
|
||||
mutex_lock(&zbus_list_lock);
|
||||
list_add_tail(&zbus->bus_next, &zbus_list);
|
||||
mutex_unlock(&zbus_list_lock);
|
||||
|
||||
kref_init(&zbus->kref);
|
||||
INIT_LIST_HEAD(&zbus->resources);
|
||||
|
|
@ -291,6 +331,10 @@ static struct zpci_bus *zpci_bus_alloc(int topo, bool topo_is_tid)
|
|||
zbus->bus_resource.flags = IORESOURCE_BUS;
|
||||
pci_add_resource(&zbus->resources, &zbus->bus_resource);
|
||||
|
||||
mutex_lock(&zbus_list_lock);
|
||||
list_add_tail(&zbus->bus_next, &zbus_list);
|
||||
mutex_unlock(&zbus_list_lock);
|
||||
|
||||
return zbus;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,20 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
|
|||
void zpci_bus_device_unregister(struct zpci_dev *zdev);
|
||||
|
||||
int zpci_bus_scan_bus(struct zpci_bus *zbus);
|
||||
void zpci_bus_scan_busses(void);
|
||||
void zpci_bus_get_next(struct zpci_bus **pos);
|
||||
|
||||
/**
|
||||
* zpci_bus_for_each - iterate over all the registered zbus objects
|
||||
* @pos: a struct zpci_bus * as cursor
|
||||
*
|
||||
* Acquires and releases references as the cursor iterates over the registered
|
||||
* objects. Is tolerant against concurrent removals of objects.
|
||||
*
|
||||
* Context: Process context. May sleep.
|
||||
*/
|
||||
#define zpci_bus_for_each(pos) \
|
||||
for ((pos) = NULL, zpci_bus_get_next(&(pos)); (pos) != NULL; \
|
||||
zpci_bus_get_next(&(pos)))
|
||||
|
||||
int zpci_bus_scan_device(struct zpci_dev *zdev);
|
||||
void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
|
||||
|
|
|
|||
Loading…
Reference in New Issue