vfio: Delete vfio_get/put_group from vfio_iommu_group_notifier()

iommu_group_register_notifier()/iommu_group_unregister_notifier() are
built using a blocking_notifier_chain which integrates a rwsem. The
notifier function cannot be running outside its registration.

When considering how the notifier function interacts with create/destroy
of the group there are two fringe cases, the notifier starts before
list_add(&vfio.group_list) and the notifier runs after the kref
becomes 0.

Prior to vfio_create_group() unlocking and returning we have
   container_users == 0
   device_list == empty
And this cannot change until the mutex is unlocked.

After the kref goes to zero we must also have
   container_users == 0
   device_list == empty

Both are required because they are balanced operations and a 0 kref means
some caller became unbalanced. Add the missing assertion that
container_users must be zero as well.

These two facts are important because when checking each operation we see:

- IOMMU_GROUP_NOTIFY_ADD_DEVICE
   Empty device_list avoids the WARN_ON in vfio_group_nb_add_dev()
   0 container_users ends the call
- IOMMU_GROUP_NOTIFY_BOUND_DRIVER
   0 container_users ends the call

Finally, we have IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER, which only deletes
items from the unbound list. During creation this list is empty, during
kref == 0 nothing can read this list, and it will be freed soon.

Since the vfio_group_release() doesn't hold the appropriate lock to
manipulate the unbound_list and could race with the notifier, move the
cleanup to directly before the kfree.

This allows deleting all of the deferred group put code.

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/1-v3-2fdfe4ca2cc6+18c-vfio_group_cdev_jgg@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
Jason Gunthorpe 2021-10-15 08:40:50 -03:00 committed by Alex Williamson
parent 48f06ca420
commit 63b150fde7
1 changed files with 15 additions and 85 deletions

View File

@ -324,12 +324,16 @@ static void vfio_container_put(struct vfio_container *container)
static void vfio_group_unlock_and_free(struct vfio_group *group)
{
struct vfio_unbound_dev *unbound, *tmp;
mutex_unlock(&vfio.group_lock);
/*
* Unregister outside of lock. A spurious callback is harmless now
* that the group is no longer in vfio.group_list.
*/
iommu_group_unregister_notifier(group->iommu_group, &group->nb);
list_for_each_entry_safe(unbound, tmp,
&group->unbound_list, unbound_next) {
list_del(&unbound->unbound_next);
kfree(unbound);
}
kfree(group);
}
@ -360,14 +364,6 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
group->nb.notifier_call = vfio_iommu_group_notifier;
/*
* blocking notifiers acquire a rwsem around registering and hold
* it around callback. Therefore, need to register outside of
* vfio.group_lock to avoid A-B/B-A contention. Our callback won't
* do anything unless it can find the group in vfio.group_list, so
* no harm in registering early.
*/
ret = iommu_group_register_notifier(iommu_group, &group->nb);
if (ret) {
kfree(group);
@ -415,18 +411,18 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
static void vfio_group_release(struct kref *kref)
{
struct vfio_group *group = container_of(kref, struct vfio_group, kref);
struct vfio_unbound_dev *unbound, *tmp;
struct iommu_group *iommu_group = group->iommu_group;
/*
* These data structures all have paired operations that can only be
* undone when the caller holds a live reference on the group. Since all
* pairs must be undone these WARN_ON's indicate some caller did not
* properly hold the group reference.
*/
WARN_ON(!list_empty(&group->device_list));
WARN_ON(atomic_read(&group->container_users));
WARN_ON(group->notifier.head);
list_for_each_entry_safe(unbound, tmp,
&group->unbound_list, unbound_next) {
list_del(&unbound->unbound_next);
kfree(unbound);
}
device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
list_del(&group->vfio_next);
vfio_free_group_minor(group->minor);
@ -439,61 +435,12 @@ static void vfio_group_put(struct vfio_group *group)
kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
}
struct vfio_group_put_work {
struct work_struct work;
struct vfio_group *group;
};
static void vfio_group_put_bg(struct work_struct *work)
{
struct vfio_group_put_work *do_work;
do_work = container_of(work, struct vfio_group_put_work, work);
vfio_group_put(do_work->group);
kfree(do_work);
}
static void vfio_group_schedule_put(struct vfio_group *group)
{
struct vfio_group_put_work *do_work;
do_work = kmalloc(sizeof(*do_work), GFP_KERNEL);
if (WARN_ON(!do_work))
return;
INIT_WORK(&do_work->work, vfio_group_put_bg);
do_work->group = group;
schedule_work(&do_work->work);
}
/* Assume group_lock or group reference is held */
static void vfio_group_get(struct vfio_group *group)
{
kref_get(&group->kref);
}
/*
* Not really a try as we will sleep for mutex, but we need to make
* sure the group pointer is valid under lock and get a reference.
*/
static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
{
struct vfio_group *target = group;
mutex_lock(&vfio.group_lock);
list_for_each_entry(group, &vfio.group_list, vfio_next) {
if (group == target) {
vfio_group_get(group);
mutex_unlock(&vfio.group_lock);
return group;
}
}
mutex_unlock(&vfio.group_lock);
return NULL;
}
static
struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
{
@ -691,14 +638,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
struct device *dev = data;
struct vfio_unbound_dev *unbound;
/*
* Need to go through a group_lock lookup to get a reference or we
* risk racing a group being removed. Ignore spurious notifies.
*/
group = vfio_group_try_get(group);
if (!group)
return NOTIFY_OK;
switch (action) {
case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
vfio_group_nb_add_dev(group, dev);
@ -749,15 +688,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
mutex_unlock(&group->unbound_lock);
break;
}
/*
* If we're the last reference to the group, the group will be
* released, which includes unregistering the iommu group notifier.
* We hold a read-lock on that notifier list, unregistering needs
* a write-lock... deadlock. Release our reference asynchronously
* to avoid that situation.
*/
vfio_group_schedule_put(group);
return NOTIFY_OK;
}