2019-06-04 16:11:33 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2016-11-17 04:46:13 +08:00
|
|
|
/*
|
|
|
|
* Mediated device Core Driver
|
|
|
|
*
|
|
|
|
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
|
|
|
* Author: Neo Jia <cjia@nvidia.com>
|
|
|
|
* Kirti Wankhede <kwankhede@nvidia.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/device.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/uuid.h>
|
|
|
|
#include <linux/sysfs.h>
|
|
|
|
#include <linux/mdev.h>
|
|
|
|
|
|
|
|
#include "mdev_private.h"
|
|
|
|
|
|
|
|
#define DRIVER_VERSION "0.1"
|
|
|
|
#define DRIVER_AUTHOR "NVIDIA Corporation"
|
|
|
|
#define DRIVER_DESC "Mediated device Core Driver"
|
|
|
|
|
|
|
|
static LIST_HEAD(parent_list);
|
|
|
|
static DEFINE_MUTEX(parent_list_lock);
|
|
|
|
static struct class_compat *mdev_bus_compat_class;
|
|
|
|
|
2016-12-30 23:13:33 +08:00
|
|
|
static LIST_HEAD(mdev_list);
|
|
|
|
static DEFINE_MUTEX(mdev_list_lock);
|
|
|
|
|
2016-12-30 23:13:41 +08:00
|
|
|
struct device *mdev_parent_dev(struct mdev_device *mdev)
|
|
|
|
{
|
2021-04-07 03:40:33 +08:00
|
|
|
return mdev->type->parent->dev;
|
2016-12-30 23:13:41 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(mdev_parent_dev);
|
|
|
|
|
2021-04-07 03:40:34 +08:00
|
|
|
/*
|
|
|
|
* Return the index in supported_type_groups that this mdev_device was created
|
|
|
|
* from.
|
|
|
|
*/
|
|
|
|
unsigned int mdev_get_type_group_id(struct mdev_device *mdev)
|
|
|
|
{
|
|
|
|
return mdev->type->type_group_id;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(mdev_get_type_group_id);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Used in mdev_type_attribute sysfs functions to return the index in the
|
|
|
|
* supported_type_groups that the sysfs is called from.
|
|
|
|
*/
|
vfio/mdev: Correct the function signatures for the mdev_type_attributes
The driver core standard is to pass in the properly typed object, the
properly typed attribute and the buffer data. It stems from the root
kobject method:
ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,..)
Each subclass of kobject should provide their own function with the same
signature but more specific types, eg struct device uses:
ssize_t (*show)(struct device *dev, struct device_attribute *attr,..)
In this case the existing signature is:
ssize_t (*show)(struct kobject *kobj, struct device *dev,..)
Where kobj is a 'struct mdev_type *' and dev is 'mdev_type->parent->dev'.
Change the mdev_type related sysfs attribute functions to:
ssize_t (*show)(struct mdev_type *mtype, struct mdev_type_attribute *attr,..)
In order to restore type safety and match the driver core standard
There are no current users of 'attr', but if it is ever needed it would be
hard to add in retroactively, so do it now.
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Message-Id: <18-v2-d36939638fc6+d54-vfio2_jgg@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2021-04-07 03:40:41 +08:00
|
|
|
unsigned int mtype_get_type_group_id(struct mdev_type *mtype)
|
2021-04-07 03:40:34 +08:00
|
|
|
{
|
vfio/mdev: Correct the function signatures for the mdev_type_attributes
The driver core standard is to pass in the properly typed object, the
properly typed attribute and the buffer data. It stems from the root
kobject method:
ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,..)
Each subclass of kobject should provide their own function with the same
signature but more specific types, eg struct device uses:
ssize_t (*show)(struct device *dev, struct device_attribute *attr,..)
In this case the existing signature is:
ssize_t (*show)(struct kobject *kobj, struct device *dev,..)
Where kobj is a 'struct mdev_type *' and dev is 'mdev_type->parent->dev'.
Change the mdev_type related sysfs attribute functions to:
ssize_t (*show)(struct mdev_type *mtype, struct mdev_type_attribute *attr,..)
In order to restore type safety and match the driver core standard
There are no current users of 'attr', but if it is ever needed it would be
hard to add in retroactively, so do it now.
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Message-Id: <18-v2-d36939638fc6+d54-vfio2_jgg@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2021-04-07 03:40:41 +08:00
|
|
|
return mtype->type_group_id;
|
2021-04-07 03:40:34 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(mtype_get_type_group_id);
|
|
|
|
|
vfio/mdev: Correct the function signatures for the mdev_type_attributes
The driver core standard is to pass in the properly typed object, the
properly typed attribute and the buffer data. It stems from the root
kobject method:
ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,..)
Each subclass of kobject should provide their own function with the same
signature but more specific types, eg struct device uses:
ssize_t (*show)(struct device *dev, struct device_attribute *attr,..)
In this case the existing signature is:
ssize_t (*show)(struct kobject *kobj, struct device *dev,..)
Where kobj is a 'struct mdev_type *' and dev is 'mdev_type->parent->dev'.
Change the mdev_type related sysfs attribute functions to:
ssize_t (*show)(struct mdev_type *mtype, struct mdev_type_attribute *attr,..)
In order to restore type safety and match the driver core standard
There are no current users of 'attr', but if it is ever needed it would be
hard to add in retroactively, so do it now.
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Message-Id: <18-v2-d36939638fc6+d54-vfio2_jgg@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2021-04-07 03:40:41 +08:00
|
|
|
/*
|
|
|
|
* Used in mdev_type_attribute sysfs functions to return the parent struct
|
|
|
|
* device
|
|
|
|
*/
|
|
|
|
struct device *mtype_get_parent_dev(struct mdev_type *mtype)
|
|
|
|
{
|
|
|
|
return mtype->parent->dev;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(mtype_get_parent_dev);
|
|
|
|
|
2016-11-17 04:46:13 +08:00
|
|
|
/* Should be called holding parent_list_lock */
|
2016-12-30 23:13:38 +08:00
|
|
|
static struct mdev_parent *__find_parent_device(struct device *dev)
|
2016-11-17 04:46:13 +08:00
|
|
|
{
|
2016-12-30 23:13:38 +08:00
|
|
|
struct mdev_parent *parent;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
|
|
|
list_for_each_entry(parent, &parent_list, next) {
|
|
|
|
if (parent->dev == dev)
|
|
|
|
return parent;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-04-07 03:40:29 +08:00
|
|
|
void mdev_release_parent(struct kref *kref)
|
2016-11-17 04:46:13 +08:00
|
|
|
{
|
2016-12-30 23:13:38 +08:00
|
|
|
struct mdev_parent *parent = container_of(kref, struct mdev_parent,
|
|
|
|
ref);
|
2016-11-17 04:46:13 +08:00
|
|
|
struct device *dev = parent->dev;
|
|
|
|
|
|
|
|
kfree(parent);
|
|
|
|
put_device(dev);
|
|
|
|
}
|
|
|
|
|
2019-06-07 00:52:33 +08:00
|
|
|
/* Caller must hold parent unreg_sem read or write lock */
|
|
|
|
static void mdev_device_remove_common(struct mdev_device *mdev)
|
|
|
|
{
|
2021-04-07 03:40:33 +08:00
|
|
|
struct mdev_parent *parent = mdev->type->parent;
|
2019-06-07 00:52:33 +08:00
|
|
|
int ret;
|
|
|
|
|
2021-04-07 03:40:28 +08:00
|
|
|
mdev_remove_sysfs_files(mdev);
|
2019-06-07 00:52:33 +08:00
|
|
|
device_del(&mdev->dev);
|
|
|
|
lockdep_assert_held(&parent->unreg_sem);
|
2021-06-17 22:22:15 +08:00
|
|
|
if (parent->ops->remove) {
|
|
|
|
ret = parent->ops->remove(mdev);
|
|
|
|
if (ret)
|
|
|
|
dev_err(&mdev->dev, "Remove failed: err=%d\n", ret);
|
|
|
|
}
|
2019-06-07 00:52:33 +08:00
|
|
|
|
|
|
|
/* Balances with device_initialize() */
|
|
|
|
put_device(&mdev->dev);
|
|
|
|
}
|
|
|
|
|
2016-11-17 04:46:13 +08:00
|
|
|
static int mdev_device_remove_cb(struct device *dev, void *data)
|
|
|
|
{
|
2021-04-07 03:40:26 +08:00
|
|
|
struct mdev_device *mdev = mdev_from_dev(dev);
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2021-04-07 03:40:26 +08:00
|
|
|
if (mdev)
|
2019-06-07 00:52:33 +08:00
|
|
|
mdev_device_remove_common(mdev);
|
2019-05-01 06:49:33 +08:00
|
|
|
return 0;
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* mdev_register_device : Register a device
|
|
|
|
* @dev: device structure representing parent device.
|
|
|
|
* @ops: Parent device operation structure to be registered.
|
|
|
|
*
|
|
|
|
* Add device to list of registered parent devices.
|
|
|
|
* Returns a negative value on error, otherwise 0.
|
|
|
|
*/
|
2016-12-30 23:13:38 +08:00
|
|
|
int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
|
2016-11-17 04:46:13 +08:00
|
|
|
{
|
|
|
|
int ret;
|
2016-12-30 23:13:38 +08:00
|
|
|
struct mdev_parent *parent;
|
2019-07-12 03:26:52 +08:00
|
|
|
char *env_string = "MDEV_STATE=registered";
|
|
|
|
char *envp[] = { env_string, NULL };
|
2016-11-17 04:46:13 +08:00
|
|
|
|
|
|
|
/* check for mandatory ops */
|
2021-06-17 22:22:15 +08:00
|
|
|
if (!ops || !ops->supported_type_groups)
|
|
|
|
return -EINVAL;
|
|
|
|
if (!ops->device_driver && (!ops->create || !ops->remove))
|
2016-11-17 04:46:13 +08:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
dev = get_device(dev);
|
|
|
|
if (!dev)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
mutex_lock(&parent_list_lock);
|
|
|
|
|
|
|
|
/* Check for duplicate */
|
|
|
|
parent = __find_parent_device(dev);
|
|
|
|
if (parent) {
|
2019-05-01 06:49:28 +08:00
|
|
|
parent = NULL;
|
2016-11-17 04:46:13 +08:00
|
|
|
ret = -EEXIST;
|
|
|
|
goto add_dev_err;
|
|
|
|
}
|
|
|
|
|
|
|
|
parent = kzalloc(sizeof(*parent), GFP_KERNEL);
|
|
|
|
if (!parent) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto add_dev_err;
|
|
|
|
}
|
|
|
|
|
|
|
|
kref_init(&parent->ref);
|
2019-06-07 00:52:33 +08:00
|
|
|
init_rwsem(&parent->unreg_sem);
|
2016-11-17 04:46:13 +08:00
|
|
|
|
|
|
|
parent->dev = dev;
|
|
|
|
parent->ops = ops;
|
|
|
|
|
|
|
|
if (!mdev_bus_compat_class) {
|
|
|
|
mdev_bus_compat_class = class_compat_register("mdev_bus");
|
|
|
|
if (!mdev_bus_compat_class) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto add_dev_err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = parent_create_sysfs_files(parent);
|
|
|
|
if (ret)
|
|
|
|
goto add_dev_err;
|
|
|
|
|
|
|
|
ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
|
|
|
|
if (ret)
|
|
|
|
dev_warn(dev, "Failed to create compatibility class link\n");
|
|
|
|
|
|
|
|
list_add(&parent->next, &parent_list);
|
|
|
|
mutex_unlock(&parent_list_lock);
|
|
|
|
|
|
|
|
dev_info(dev, "MDEV: Registered\n");
|
2019-07-12 03:26:52 +08:00
|
|
|
kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);
|
|
|
|
|
2016-11-17 04:46:13 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
add_dev_err:
|
|
|
|
mutex_unlock(&parent_list_lock);
|
|
|
|
if (parent)
|
|
|
|
mdev_put_parent(parent);
|
|
|
|
else
|
|
|
|
put_device(dev);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(mdev_register_device);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* mdev_unregister_device : Unregister a parent device
|
|
|
|
* @dev: device structure representing parent device.
|
|
|
|
*
|
|
|
|
* Remove device from list of registered parent devices. Give a chance to free
|
|
|
|
* existing mediated devices for given device.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void mdev_unregister_device(struct device *dev)
|
|
|
|
{
|
2016-12-30 23:13:38 +08:00
|
|
|
struct mdev_parent *parent;
|
2019-07-12 03:26:52 +08:00
|
|
|
char *env_string = "MDEV_STATE=unregistered";
|
|
|
|
char *envp[] = { env_string, NULL };
|
2016-11-17 04:46:13 +08:00
|
|
|
|
|
|
|
mutex_lock(&parent_list_lock);
|
|
|
|
parent = __find_parent_device(dev);
|
|
|
|
|
|
|
|
if (!parent) {
|
|
|
|
mutex_unlock(&parent_list_lock);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
dev_info(dev, "MDEV: Unregistering\n");
|
|
|
|
|
|
|
|
list_del(&parent->next);
|
2019-06-07 00:52:33 +08:00
|
|
|
mutex_unlock(&parent_list_lock);
|
|
|
|
|
|
|
|
down_write(&parent->unreg_sem);
|
|
|
|
|
2016-11-17 04:46:13 +08:00
|
|
|
class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
|
|
|
|
|
2019-05-01 06:49:33 +08:00
|
|
|
device_for_each_child(dev, NULL, mdev_device_remove_cb);
|
2016-11-17 04:46:13 +08:00
|
|
|
|
|
|
|
parent_remove_sysfs_files(parent);
|
2019-06-07 00:52:33 +08:00
|
|
|
up_write(&parent->unreg_sem);
|
2016-11-17 04:46:13 +08:00
|
|
|
|
|
|
|
mdev_put_parent(parent);
|
2019-07-12 03:26:52 +08:00
|
|
|
|
|
|
|
/* We still have the caller's reference to use for the uevent */
|
|
|
|
kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(mdev_unregister_device);
|
|
|
|
|
2021-04-07 03:40:31 +08:00
|
|
|
static void mdev_device_release(struct device *dev)
|
2016-11-17 04:46:13 +08:00
|
|
|
{
|
2021-04-07 03:40:31 +08:00
|
|
|
struct mdev_device *mdev = to_mdev_device(dev);
|
|
|
|
|
|
|
|
/* Pairs with the get in mdev_device_create() */
|
2021-04-07 03:40:33 +08:00
|
|
|
kobject_put(&mdev->type->kobj);
|
2021-04-07 03:40:31 +08:00
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mutex_lock(&mdev_list_lock);
|
|
|
|
list_del(&mdev->next);
|
|
|
|
mutex_unlock(&mdev_list_lock);
|
|
|
|
|
2016-11-17 04:46:13 +08:00
|
|
|
dev_dbg(&mdev->dev, "MDEV: destroying\n");
|
|
|
|
kfree(mdev);
|
|
|
|
}
|
|
|
|
|
2021-04-07 03:40:28 +08:00
|
|
|
int mdev_device_create(struct mdev_type *type, const guid_t *uuid)
|
2016-11-17 04:46:13 +08:00
|
|
|
{
|
|
|
|
int ret;
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
struct mdev_device *mdev, *tmp;
|
2021-04-07 03:40:29 +08:00
|
|
|
struct mdev_parent *parent = type->parent;
|
2021-06-17 22:22:15 +08:00
|
|
|
struct mdev_driver *drv = parent->ops->device_driver;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mutex_lock(&mdev_list_lock);
|
2016-11-17 04:46:13 +08:00
|
|
|
|
|
|
|
/* Check for duplicate */
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
list_for_each_entry(tmp, &mdev_list, next) {
|
2019-01-11 03:00:27 +08:00
|
|
|
if (guid_equal(&tmp->uuid, uuid)) {
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mutex_unlock(&mdev_list_lock);
|
2021-04-07 03:40:31 +08:00
|
|
|
return -EEXIST;
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
}
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
|
|
|
|
if (!mdev) {
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mutex_unlock(&mdev_list_lock);
|
2021-04-07 03:40:31 +08:00
|
|
|
return -ENOMEM;
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
|
|
|
|
2021-04-07 03:40:31 +08:00
|
|
|
device_initialize(&mdev->dev);
|
|
|
|
mdev->dev.parent = parent->dev;
|
|
|
|
mdev->dev.bus = &mdev_bus_type;
|
|
|
|
mdev->dev.release = mdev_device_release;
|
|
|
|
mdev->dev.groups = parent->ops->mdev_attr_groups;
|
|
|
|
mdev->type = type;
|
|
|
|
/* Pairs with the put in mdev_device_release() */
|
2021-04-07 03:40:33 +08:00
|
|
|
kobject_get(&type->kobj);
|
2021-04-07 03:40:31 +08:00
|
|
|
|
2019-01-11 03:00:27 +08:00
|
|
|
guid_copy(&mdev->uuid, uuid);
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
list_add(&mdev->next, &mdev_list);
|
|
|
|
mutex_unlock(&mdev_list_lock);
|
|
|
|
|
2021-04-07 03:40:32 +08:00
|
|
|
ret = dev_set_name(&mdev->dev, "%pUl", uuid);
|
|
|
|
if (ret)
|
|
|
|
goto out_put_device;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2019-06-07 00:52:33 +08:00
|
|
|
/* Check if parent unregistration has started */
|
|
|
|
if (!down_read_trylock(&parent->unreg_sem)) {
|
|
|
|
ret = -ENODEV;
|
2021-04-07 03:40:31 +08:00
|
|
|
goto out_put_device;
|
2019-06-07 00:52:33 +08:00
|
|
|
}
|
|
|
|
|
2021-06-17 22:22:15 +08:00
|
|
|
if (parent->ops->create) {
|
|
|
|
ret = parent->ops->create(mdev);
|
|
|
|
if (ret)
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
2016-11-17 04:46:13 +08:00
|
|
|
|
vfio/mdev: Improve the create/remove sequence
This patch addresses below two issues and prepares the code to address
3rd issue listed below.
1. mdev device is placed on the mdev bus before it is created in the
vendor driver. Once a device is placed on the mdev bus without creating
its supporting underlying vendor device, mdev driver's probe() gets
triggered. However there isn't a stable mdev available to work on.
create_store()
mdev_create_device()
device_register()
...
vfio_mdev_probe()
[...]
parent->ops->create()
vfio_ap_mdev_create()
mdev_set_drvdata(mdev, matrix_mdev);
/* Valid pointer set above */
Due to this way of initialization, mdev driver who wants to use the mdev,
doesn't have a valid mdev to work on.
2. Current creation sequence is,
parent->ops_create()
groups_register()
Remove sequence is,
parent->ops->remove()
groups_unregister()
However, remove sequence should be exact mirror of creation sequence.
Once this is achieved, all users of the mdev will be terminated first
before removing underlying vendor device.
(Follow standard linux driver model).
At that point vendor's remove() ops shouldn't fail because taking the
device off the bus should terminate any usage.
3. When remove operation fails, mdev sysfs removal attempts to add the
file back on already removed device. Following call trace [1] is observed.
[1] call trace:
kernel: WARNING: CPU: 2 PID: 9348 at fs/sysfs/file.c:327 sysfs_create_file_ns+0x7f/0x90
kernel: CPU: 2 PID: 9348 Comm: bash Kdump: loaded Not tainted 5.1.0-rc6-vdevbus+ #6
kernel: Hardware name: Supermicro SYS-6028U-TR4+/X10DRU-i+, BIOS 2.0b 08/09/2016
kernel: RIP: 0010:sysfs_create_file_ns+0x7f/0x90
kernel: Call Trace:
kernel: remove_store+0xdc/0x100 [mdev]
kernel: kernfs_fop_write+0x113/0x1a0
kernel: vfs_write+0xad/0x1b0
kernel: ksys_write+0x5a/0xe0
kernel: do_syscall_64+0x5a/0x210
kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe
Therefore, mdev core is improved in following ways.
1. Split the device registration/deregistration sequence so that some
things can be done between initialization of the device and hooking it
up to the bus respectively after deregistering it from the bus but
before giving up our final reference.
In particular, this means invoking the ->create() and ->remove()
callbacks in those new windows. This gives the vendor driver an
initialized mdev device to work with during creation.
At the same time, a bus driver who wish to bind to mdev driver also
gets initialized mdev device.
This follows standard Linux kernel bus and device model.
2. During remove flow, first remove the device from the bus. This
ensures that any bus specific devices are removed.
Once device is taken off the mdev bus, invoke remove() of mdev
from the vendor driver.
3. The driver core device model provides way to register and auto
unregister the device sysfs attribute groups at dev->groups.
Make use of dev->groups to let core create the groups and eliminate
code to avoid explicit groups creation and removal.
To ensure, that new sequence is solid, a below stack dump of a
process is taken who attempts to remove the device while device is in
use by vfio driver and user application.
This stack dump validates that vfio driver guards against such device
removal when device is in use.
cat /proc/21962/stack
[<0>] vfio_del_group_dev+0x216/0x3c0 [vfio]
[<0>] mdev_remove+0x21/0x40 [mdev]
[<0>] device_release_driver_internal+0xe8/0x1b0
[<0>] bus_remove_device+0xf9/0x170
[<0>] device_del+0x168/0x350
[<0>] mdev_device_remove_common+0x1d/0x50 [mdev]
[<0>] mdev_device_remove+0x8c/0xd0 [mdev]
[<0>] remove_store+0x71/0x90 [mdev]
[<0>] kernfs_fop_write+0x113/0x1a0
[<0>] vfs_write+0xad/0x1b0
[<0>] ksys_write+0x5a/0xe0
[<0>] do_syscall_64+0x5a/0x210
[<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[<0>] 0xffffffffffffffff
This prepares the code to eliminate calling device_create_file() in
subsequent patch.
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2019-06-07 00:52:32 +08:00
|
|
|
ret = device_add(&mdev->dev);
|
2016-11-17 04:46:13 +08:00
|
|
|
if (ret)
|
2021-04-07 03:40:31 +08:00
|
|
|
goto out_remove;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2021-06-17 22:22:15 +08:00
|
|
|
if (!drv)
|
|
|
|
drv = &vfio_mdev_driver;
|
|
|
|
ret = device_driver_attach(&drv->driver, &mdev->dev);
|
|
|
|
if (ret)
|
|
|
|
goto out_del;
|
|
|
|
|
2021-04-07 03:40:28 +08:00
|
|
|
ret = mdev_create_sysfs_files(mdev);
|
vfio/mdev: Improve the create/remove sequence
This patch addresses below two issues and prepares the code to address
3rd issue listed below.
1. mdev device is placed on the mdev bus before it is created in the
vendor driver. Once a device is placed on the mdev bus without creating
its supporting underlying vendor device, mdev driver's probe() gets
triggered. However there isn't a stable mdev available to work on.
create_store()
mdev_create_device()
device_register()
...
vfio_mdev_probe()
[...]
parent->ops->create()
vfio_ap_mdev_create()
mdev_set_drvdata(mdev, matrix_mdev);
/* Valid pointer set above */
Due to this way of initialization, mdev driver who wants to use the mdev,
doesn't have a valid mdev to work on.
2. Current creation sequence is,
parent->ops_create()
groups_register()
Remove sequence is,
parent->ops->remove()
groups_unregister()
However, remove sequence should be exact mirror of creation sequence.
Once this is achieved, all users of the mdev will be terminated first
before removing underlying vendor device.
(Follow standard linux driver model).
At that point vendor's remove() ops shouldn't fail because taking the
device off the bus should terminate any usage.
3. When remove operation fails, mdev sysfs removal attempts to add the
file back on already removed device. Following call trace [1] is observed.
[1] call trace:
kernel: WARNING: CPU: 2 PID: 9348 at fs/sysfs/file.c:327 sysfs_create_file_ns+0x7f/0x90
kernel: CPU: 2 PID: 9348 Comm: bash Kdump: loaded Not tainted 5.1.0-rc6-vdevbus+ #6
kernel: Hardware name: Supermicro SYS-6028U-TR4+/X10DRU-i+, BIOS 2.0b 08/09/2016
kernel: RIP: 0010:sysfs_create_file_ns+0x7f/0x90
kernel: Call Trace:
kernel: remove_store+0xdc/0x100 [mdev]
kernel: kernfs_fop_write+0x113/0x1a0
kernel: vfs_write+0xad/0x1b0
kernel: ksys_write+0x5a/0xe0
kernel: do_syscall_64+0x5a/0x210
kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe
Therefore, mdev core is improved in following ways.
1. Split the device registration/deregistration sequence so that some
things can be done between initialization of the device and hooking it
up to the bus respectively after deregistering it from the bus but
before giving up our final reference.
In particular, this means invoking the ->create() and ->remove()
callbacks in those new windows. This gives the vendor driver an
initialized mdev device to work with during creation.
At the same time, a bus driver who wish to bind to mdev driver also
gets initialized mdev device.
This follows standard Linux kernel bus and device model.
2. During remove flow, first remove the device from the bus. This
ensures that any bus specific devices are removed.
Once device is taken off the mdev bus, invoke remove() of mdev
from the vendor driver.
3. The driver core device model provides way to register and auto
unregister the device sysfs attribute groups at dev->groups.
Make use of dev->groups to let core create the groups and eliminate
code to avoid explicit groups creation and removal.
To ensure, that new sequence is solid, a below stack dump of a
process is taken who attempts to remove the device while device is in
use by vfio driver and user application.
This stack dump validates that vfio driver guards against such device
removal when device is in use.
cat /proc/21962/stack
[<0>] vfio_del_group_dev+0x216/0x3c0 [vfio]
[<0>] mdev_remove+0x21/0x40 [mdev]
[<0>] device_release_driver_internal+0xe8/0x1b0
[<0>] bus_remove_device+0xf9/0x170
[<0>] device_del+0x168/0x350
[<0>] mdev_device_remove_common+0x1d/0x50 [mdev]
[<0>] mdev_device_remove+0x8c/0xd0 [mdev]
[<0>] remove_store+0x71/0x90 [mdev]
[<0>] kernfs_fop_write+0x113/0x1a0
[<0>] vfs_write+0xad/0x1b0
[<0>] ksys_write+0x5a/0xe0
[<0>] do_syscall_64+0x5a/0x210
[<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[<0>] 0xffffffffffffffff
This prepares the code to eliminate calling device_create_file() in
subsequent patch.
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2019-06-07 00:52:32 +08:00
|
|
|
if (ret)
|
2021-04-07 03:40:31 +08:00
|
|
|
goto out_del;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mdev->active = true;
|
2016-11-17 04:46:13 +08:00
|
|
|
dev_dbg(&mdev->dev, "MDEV: created\n");
|
2019-06-07 00:52:33 +08:00
|
|
|
up_read(&parent->unreg_sem);
|
2016-11-17 04:46:13 +08:00
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
return 0;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2021-04-07 03:40:31 +08:00
|
|
|
out_del:
|
vfio/mdev: Improve the create/remove sequence
This patch addresses below two issues and prepares the code to address
3rd issue listed below.
1. mdev device is placed on the mdev bus before it is created in the
vendor driver. Once a device is placed on the mdev bus without creating
its supporting underlying vendor device, mdev driver's probe() gets
triggered. However there isn't a stable mdev available to work on.
create_store()
mdev_create_device()
device_register()
...
vfio_mdev_probe()
[...]
parent->ops->create()
vfio_ap_mdev_create()
mdev_set_drvdata(mdev, matrix_mdev);
/* Valid pointer set above */
Due to this way of initialization, mdev driver who wants to use the mdev,
doesn't have a valid mdev to work on.
2. Current creation sequence is,
parent->ops_create()
groups_register()
Remove sequence is,
parent->ops->remove()
groups_unregister()
However, remove sequence should be exact mirror of creation sequence.
Once this is achieved, all users of the mdev will be terminated first
before removing underlying vendor device.
(Follow standard linux driver model).
At that point vendor's remove() ops shouldn't fail because taking the
device off the bus should terminate any usage.
3. When remove operation fails, mdev sysfs removal attempts to add the
file back on already removed device. Following call trace [1] is observed.
[1] call trace:
kernel: WARNING: CPU: 2 PID: 9348 at fs/sysfs/file.c:327 sysfs_create_file_ns+0x7f/0x90
kernel: CPU: 2 PID: 9348 Comm: bash Kdump: loaded Not tainted 5.1.0-rc6-vdevbus+ #6
kernel: Hardware name: Supermicro SYS-6028U-TR4+/X10DRU-i+, BIOS 2.0b 08/09/2016
kernel: RIP: 0010:sysfs_create_file_ns+0x7f/0x90
kernel: Call Trace:
kernel: remove_store+0xdc/0x100 [mdev]
kernel: kernfs_fop_write+0x113/0x1a0
kernel: vfs_write+0xad/0x1b0
kernel: ksys_write+0x5a/0xe0
kernel: do_syscall_64+0x5a/0x210
kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe
Therefore, mdev core is improved in following ways.
1. Split the device registration/deregistration sequence so that some
things can be done between initialization of the device and hooking it
up to the bus respectively after deregistering it from the bus but
before giving up our final reference.
In particular, this means invoking the ->create() and ->remove()
callbacks in those new windows. This gives the vendor driver an
initialized mdev device to work with during creation.
At the same time, a bus driver who wish to bind to mdev driver also
gets initialized mdev device.
This follows standard Linux kernel bus and device model.
2. During remove flow, first remove the device from the bus. This
ensures that any bus specific devices are removed.
Once device is taken off the mdev bus, invoke remove() of mdev
from the vendor driver.
3. The driver core device model provides way to register and auto
unregister the device sysfs attribute groups at dev->groups.
Make use of dev->groups to let core create the groups and eliminate
code to avoid explicit groups creation and removal.
To ensure, that new sequence is solid, a below stack dump of a
process is taken who attempts to remove the device while device is in
use by vfio driver and user application.
This stack dump validates that vfio driver guards against such device
removal when device is in use.
cat /proc/21962/stack
[<0>] vfio_del_group_dev+0x216/0x3c0 [vfio]
[<0>] mdev_remove+0x21/0x40 [mdev]
[<0>] device_release_driver_internal+0xe8/0x1b0
[<0>] bus_remove_device+0xf9/0x170
[<0>] device_del+0x168/0x350
[<0>] mdev_device_remove_common+0x1d/0x50 [mdev]
[<0>] mdev_device_remove+0x8c/0xd0 [mdev]
[<0>] remove_store+0x71/0x90 [mdev]
[<0>] kernfs_fop_write+0x113/0x1a0
[<0>] vfs_write+0xad/0x1b0
[<0>] ksys_write+0x5a/0xe0
[<0>] do_syscall_64+0x5a/0x210
[<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[<0>] 0xffffffffffffffff
This prepares the code to eliminate calling device_create_file() in
subsequent patch.
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2019-06-07 00:52:32 +08:00
|
|
|
device_del(&mdev->dev);
|
2021-04-07 03:40:31 +08:00
|
|
|
out_remove:
|
2021-06-17 22:22:15 +08:00
|
|
|
if (parent->ops->remove)
|
|
|
|
parent->ops->remove(mdev);
|
2021-04-07 03:40:31 +08:00
|
|
|
out_unlock:
|
2019-06-07 00:52:33 +08:00
|
|
|
up_read(&parent->unreg_sem);
|
2021-04-07 03:40:31 +08:00
|
|
|
out_put_device:
|
vfio/mdev: Improve the create/remove sequence
This patch addresses below two issues and prepares the code to address
3rd issue listed below.
1. mdev device is placed on the mdev bus before it is created in the
vendor driver. Once a device is placed on the mdev bus without creating
its supporting underlying vendor device, mdev driver's probe() gets
triggered. However there isn't a stable mdev available to work on.
create_store()
mdev_create_device()
device_register()
...
vfio_mdev_probe()
[...]
parent->ops->create()
vfio_ap_mdev_create()
mdev_set_drvdata(mdev, matrix_mdev);
/* Valid pointer set above */
Due to this way of initialization, mdev driver who wants to use the mdev,
doesn't have a valid mdev to work on.
2. Current creation sequence is,
parent->ops_create()
groups_register()
Remove sequence is,
parent->ops->remove()
groups_unregister()
However, remove sequence should be exact mirror of creation sequence.
Once this is achieved, all users of the mdev will be terminated first
before removing underlying vendor device.
(Follow standard linux driver model).
At that point vendor's remove() ops shouldn't fail because taking the
device off the bus should terminate any usage.
3. When remove operation fails, mdev sysfs removal attempts to add the
file back on already removed device. Following call trace [1] is observed.
[1] call trace:
kernel: WARNING: CPU: 2 PID: 9348 at fs/sysfs/file.c:327 sysfs_create_file_ns+0x7f/0x90
kernel: CPU: 2 PID: 9348 Comm: bash Kdump: loaded Not tainted 5.1.0-rc6-vdevbus+ #6
kernel: Hardware name: Supermicro SYS-6028U-TR4+/X10DRU-i+, BIOS 2.0b 08/09/2016
kernel: RIP: 0010:sysfs_create_file_ns+0x7f/0x90
kernel: Call Trace:
kernel: remove_store+0xdc/0x100 [mdev]
kernel: kernfs_fop_write+0x113/0x1a0
kernel: vfs_write+0xad/0x1b0
kernel: ksys_write+0x5a/0xe0
kernel: do_syscall_64+0x5a/0x210
kernel: entry_SYSCALL_64_after_hwframe+0x49/0xbe
Therefore, mdev core is improved in following ways.
1. Split the device registration/deregistration sequence so that some
things can be done between initialization of the device and hooking it
up to the bus respectively after deregistering it from the bus but
before giving up our final reference.
In particular, this means invoking the ->create() and ->remove()
callbacks in those new windows. This gives the vendor driver an
initialized mdev device to work with during creation.
At the same time, a bus driver who wish to bind to mdev driver also
gets initialized mdev device.
This follows standard Linux kernel bus and device model.
2. During remove flow, first remove the device from the bus. This
ensures that any bus specific devices are removed.
Once device is taken off the mdev bus, invoke remove() of mdev
from the vendor driver.
3. The driver core device model provides way to register and auto
unregister the device sysfs attribute groups at dev->groups.
Make use of dev->groups to let core create the groups and eliminate
code to avoid explicit groups creation and removal.
To ensure, that new sequence is solid, a below stack dump of a
process is taken who attempts to remove the device while device is in
use by vfio driver and user application.
This stack dump validates that vfio driver guards against such device
removal when device is in use.
cat /proc/21962/stack
[<0>] vfio_del_group_dev+0x216/0x3c0 [vfio]
[<0>] mdev_remove+0x21/0x40 [mdev]
[<0>] device_release_driver_internal+0xe8/0x1b0
[<0>] bus_remove_device+0xf9/0x170
[<0>] device_del+0x168/0x350
[<0>] mdev_device_remove_common+0x1d/0x50 [mdev]
[<0>] mdev_device_remove+0x8c/0xd0 [mdev]
[<0>] remove_store+0x71/0x90 [mdev]
[<0>] kernfs_fop_write+0x113/0x1a0
[<0>] vfs_write+0xad/0x1b0
[<0>] ksys_write+0x5a/0xe0
[<0>] do_syscall_64+0x5a/0x210
[<0>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[<0>] 0xffffffffffffffff
This prepares the code to eliminate calling device_create_file() in
subsequent patch.
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2019-06-07 00:52:32 +08:00
|
|
|
put_device(&mdev->dev);
|
2016-11-17 04:46:13 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-04-07 03:40:26 +08:00
|
|
|
int mdev_device_remove(struct mdev_device *mdev)
|
2016-11-17 04:46:13 +08:00
|
|
|
{
|
2021-04-07 03:40:26 +08:00
|
|
|
struct mdev_device *tmp;
|
2021-04-07 03:40:33 +08:00
|
|
|
struct mdev_parent *parent = mdev->type->parent;
|
2016-11-17 04:46:13 +08:00
|
|
|
|
2016-12-30 23:13:33 +08:00
|
|
|
mutex_lock(&mdev_list_lock);
|
|
|
|
list_for_each_entry(tmp, &mdev_list, next) {
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
if (tmp == mdev)
|
2016-12-30 23:13:33 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
if (tmp != mdev) {
|
|
|
|
mutex_unlock(&mdev_list_lock);
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
2016-12-30 23:13:33 +08:00
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
if (!mdev->active) {
|
|
|
|
mutex_unlock(&mdev_list_lock);
|
|
|
|
return -EAGAIN;
|
|
|
|
}
|
2016-12-30 23:13:33 +08:00
|
|
|
|
vfio/mdev: Check globally for duplicate devices
When we create an mdev device, we check for duplicates against the
parent device and return -EEXIST if found, but the mdev device
namespace is global since we'll link all devices from the bus. We do
catch this later in sysfs_do_create_link_sd() to return -EEXIST, but
with it comes a kernel warning and stack trace for trying to create
duplicate sysfs links, which makes it an undesirable response.
Therefore we should really be looking for duplicates across all mdev
parent devices, or as implemented here, against our mdev device list.
Using mdev_list to prevent duplicates means that we can remove
mdev_parent.lock, but in order not to serialize mdev device creation
and removal globally, we add mdev_device.active which allows UUIDs to
be reserved such that we can drop the mdev_list_lock before the mdev
device is fully in place.
Two behavioral notes; first, mdev_parent.lock had the side-effect of
serializing mdev create and remove ops per parent device. This was
an implementation detail, not an intentional guarantee provided to
the mdev vendor drivers. Vendor drivers can trivially provide this
serialization internally if necessary. Second, review comments note
the new -EAGAIN behavior when the device, and in particular the remove
attribute, becomes visible in sysfs. If a remove is triggered prior
to completion of mdev_device_create() the user will see a -EAGAIN
error. While the errno is different, receiving an error during this
period is not, the previous implementation returned -ENODEV for the
same condition. Furthermore, the consistency to the user is improved
in the case where mdev_device_remove_ops() returns error. Previously
concurrent calls to mdev_device_remove() could see the device
disappear with -ENODEV and return in the case of error. Now a user
would see -EAGAIN while the device is in this transitory state.
Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
2018-05-16 03:53:55 +08:00
|
|
|
mdev->active = false;
|
|
|
|
mutex_unlock(&mdev_list_lock);
|
2016-12-30 23:13:33 +08:00
|
|
|
|
2019-06-07 00:52:33 +08:00
|
|
|
/* Check if parent unregistration has started */
|
|
|
|
if (!down_read_trylock(&parent->unreg_sem))
|
|
|
|
return -ENODEV;
|
2016-12-30 23:13:33 +08:00
|
|
|
|
2019-06-07 00:52:33 +08:00
|
|
|
mdev_device_remove_common(mdev);
|
|
|
|
up_read(&parent->unreg_sem);
|
2016-12-30 23:13:33 +08:00
|
|
|
return 0;
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __init mdev_init(void)
|
|
|
|
{
|
2021-06-17 22:22:14 +08:00
|
|
|
int rc;
|
|
|
|
|
|
|
|
rc = mdev_bus_register();
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
rc = mdev_register_driver(&vfio_mdev_driver);
|
|
|
|
if (rc)
|
|
|
|
goto err_bus;
|
|
|
|
return 0;
|
|
|
|
err_bus:
|
|
|
|
mdev_bus_unregister();
|
|
|
|
return rc;
|
2016-11-17 04:46:13 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit mdev_exit(void)
|
|
|
|
{
|
2021-06-17 22:22:14 +08:00
|
|
|
mdev_unregister_driver(&vfio_mdev_driver);
|
|
|
|
|
2016-11-17 04:46:13 +08:00
|
|
|
if (mdev_bus_compat_class)
|
|
|
|
class_compat_unregister(mdev_bus_compat_class);
|
|
|
|
|
|
|
|
mdev_bus_unregister();
|
|
|
|
}
|
|
|
|
|
2021-07-26 22:35:23 +08:00
|
|
|
subsys_initcall(mdev_init)
|
2016-11-17 04:46:13 +08:00
|
|
|
module_exit(mdev_exit)
|
|
|
|
|
|
|
|
MODULE_VERSION(DRIVER_VERSION);
|
|
|
|
MODULE_LICENSE("GPL v2");
|
|
|
|
MODULE_AUTHOR(DRIVER_AUTHOR);
|
|
|
|
MODULE_DESCRIPTION(DRIVER_DESC);
|