From 4f3549d72d1b5c90ecc7e673402f38f4486d22c2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 May 2013 22:15:29 +0200 Subject: [PATCH 01/18] Driver core: Add offline/online device operations In some cases, graceful hot-removal of devices is not possible, although in principle the devices in question support hotplug. For example, that may happen for the last CPU in the system or for memory modules holding kernel memory. In those cases it is nice to be able to check if the given device can be gracefully hot-removed before triggering a removal procedure that cannot be aborted or reversed. Unfortunately, however, the kernel currently doesn't provide any support for that. To address that deficiency, introduce support for offline and online operations that can be performed on devices, respectively, before a hot-removal and in case when it is necessary (or convenient) to put a device back online after a successful offline (that has not been followed by removal). The idea is that the offline will fail whenever the given device cannot be gracefully removed from the system and it will not be allowed to use the device after a successful offline (until a subsequent online) in analogy with the existing CPU offline/online mechanism. For now, the offline and online operations are introduced at the bus type level, as that should be sufficient for the most urgent use cases (CPUs and memory modules). In the future, however, the approach may be extended to cover some more complicated device offline/online scenarios involving device drivers etc. The lock_device_hotplug() and unlock_device_hotplug() functions are introduced because subsequent patches need to put larger pieces of code under device_hotplug_lock to prevent race conditions between device offline and removal from happening. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Reviewed-by: Toshi Kani --- .../ABI/testing/sysfs-devices-online | 20 +++ drivers/base/core.c | 130 ++++++++++++++++++ include/linux/device.h | 21 +++ 3 files changed, 171 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-devices-online diff --git a/Documentation/ABI/testing/sysfs-devices-online b/Documentation/ABI/testing/sysfs-devices-online new file mode 100644 index 000000000000..f990026c0740 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-online @@ -0,0 +1,20 @@ +What: /sys/devices/.../online +Date: April 2013 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../online attribute is only present for + devices whose bus types provide .online() and .offline() + callbacks. The number read from it (0 or 1) reflects the value + of the device's 'offline' field. If that number is 1 and '0' + (or 'n', or 'N') is written to this file, the device bus type's + .offline() callback is executed for the device and (if + successful) its 'offline' field is updated accordingly. In + turn, if that number is 0 and '1' (or 'y', or 'Y') is written to + this file, the device bus type's .online() callback is executed + for the device and (if successful) its 'offline' field is + updated as appropriate. + + After a successful execution of the bus type's .offline() + callback the device cannot be used for any purpose until either + it is removed (i.e. device_del() is called for it), or its bus + type's .online() is exeucted successfully. diff --git a/drivers/base/core.c b/drivers/base/core.c index 016312437577..60c975686089 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -403,6 +403,36 @@ static ssize_t store_uevent(struct device *dev, struct device_attribute *attr, static struct device_attribute uevent_attr = __ATTR(uevent, S_IRUGO | S_IWUSR, show_uevent, store_uevent); +static ssize_t show_online(struct device *dev, struct device_attribute *attr, + char *buf) +{ + bool val; + + lock_device_hotplug(); + val = !dev->offline; + unlock_device_hotplug(); + return sprintf(buf, "%u\n", val); +} + +static ssize_t store_online(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + bool val; + int ret; + + ret = strtobool(buf, &val); + if (ret < 0) + return ret; + + lock_device_hotplug(); + ret = val ? device_online(dev) : device_offline(dev); + unlock_device_hotplug(); + return ret < 0 ? ret : count; +} + +static struct device_attribute online_attr = + __ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online); + static int device_add_attributes(struct device *dev, struct device_attribute *attrs) { @@ -516,6 +546,12 @@ static int device_add_attrs(struct device *dev) if (error) goto err_remove_type_groups; + if (device_supports_offline(dev) && !dev->offline_disabled) { + error = device_create_file(dev, &online_attr); + if (error) + goto err_remove_type_groups; + } + return 0; err_remove_type_groups: @@ -536,6 +572,7 @@ static void device_remove_attrs(struct device *dev) struct class *class = dev->class; const struct device_type *type = dev->type; + device_remove_file(dev, &online_attr); device_remove_groups(dev, dev->groups); if (type) @@ -1431,6 +1468,99 @@ EXPORT_SYMBOL_GPL(put_device); EXPORT_SYMBOL_GPL(device_create_file); EXPORT_SYMBOL_GPL(device_remove_file); +static DEFINE_MUTEX(device_hotplug_lock); + +void lock_device_hotplug(void) +{ + mutex_lock(&device_hotplug_lock); +} + +void unlock_device_hotplug(void) +{ + mutex_unlock(&device_hotplug_lock); +} + +static int device_check_offline(struct device *dev, void *not_used) +{ + int ret; + + ret = device_for_each_child(dev, NULL, device_check_offline); + if (ret) + return ret; + + return device_supports_offline(dev) && !dev->offline ? -EBUSY : 0; +} + +/** + * device_offline - Prepare the device for hot-removal. + * @dev: Device to be put offline. + * + * Execute the device bus type's .offline() callback, if present, to prepare + * the device for a subsequent hot-removal. If that succeeds, the device must + * not be used until either it is removed or its bus type's .online() callback + * is executed. + * + * Call under device_hotplug_lock. + */ +int device_offline(struct device *dev) +{ + int ret; + + if (dev->offline_disabled) + return -EPERM; + + ret = device_for_each_child(dev, NULL, device_check_offline); + if (ret) + return ret; + + device_lock(dev); + if (device_supports_offline(dev)) { + if (dev->offline) { + ret = 1; + } else { + ret = dev->bus->offline(dev); + if (!ret) { + kobject_uevent(&dev->kobj, KOBJ_OFFLINE); + dev->offline = true; + } + } + } + device_unlock(dev); + + return ret; +} + +/** + * device_online - Put the device back online after successful device_offline(). + * @dev: Device to be put back online. + * + * If device_offline() has been successfully executed for @dev, but the device + * has not been removed subsequently, execute its bus type's .online() callback + * to indicate that the device can be used again. + * + * Call under device_hotplug_lock. + */ +int device_online(struct device *dev) +{ + int ret = 0; + + device_lock(dev); + if (device_supports_offline(dev)) { + if (dev->offline) { + ret = dev->bus->online(dev); + if (!ret) { + kobject_uevent(&dev->kobj, KOBJ_ONLINE); + dev->offline = false; + } + } else { + ret = 1; + } + } + device_unlock(dev); + + return ret; +} + struct root_device { struct device dev; struct module *owner; diff --git a/include/linux/device.h b/include/linux/device.h index c0a126125325..eeb33315514c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -71,6 +71,10 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *); * the specific driver's probe to initial the matched device. * @remove: Called when a device removed from this bus. * @shutdown: Called at shut-down time to quiesce the device. + * + * @online: Called to put the device back online (after offlining it). + * @offline: Called to put the device offline for hot-removal. May fail. + * * @suspend: Called when a device on this bus wants to go to sleep mode. * @resume: Called to bring a device on this bus out of sleep mode. * @pm: Power management operations of this bus, callback the specific @@ -104,6 +108,9 @@ struct bus_type { int (*remove)(struct device *dev); void (*shutdown)(struct device *dev); + int (*online)(struct device *dev); + int (*offline)(struct device *dev); + int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); @@ -648,6 +655,8 @@ struct acpi_dev_node { * @release: Callback to free the device after all references have * gone away. This should be set by the allocator of the * device (i.e. the bus driver that discovered the device). + * @offline_disabled: If set, the device is permanently online. + * @offline: Set after successful invocation of bus type's .offline(). * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -720,6 +729,9 @@ struct device { void (*release)(struct device *dev); struct iommu_group *iommu_group; + + bool offline_disabled:1; + bool offline:1; }; static inline struct device *kobj_to_dev(struct kobject *kobj) @@ -856,6 +868,15 @@ extern const char *device_get_devnode(struct device *dev, extern void *dev_get_drvdata(const struct device *dev); extern int dev_set_drvdata(struct device *dev, void *data); +static inline bool device_supports_offline(struct device *dev) +{ + return dev->bus && dev->bus->offline && dev->bus->online; +} + +extern void lock_device_hotplug(void); +extern void unlock_device_hotplug(void); +extern int device_offline(struct device *dev); +extern int device_online(struct device *dev); /* * Root device objects for grouping under /sys/devices */ From 0902a9044fa5b7a0456ea4daacec2c2b3189ba8c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 May 2013 00:25:49 +0200 Subject: [PATCH 02/18] Driver core: Use generic offline/online for CPU offline/online Rework the CPU hotplug code in drivers/base/cpu.c to use the generic offline/online support introduced previously instead of its own CPU-specific code. For this purpose, modify cpu_subsys to provide offline and online callbacks for CONFIG_HOTPLUG_CPU set and remove the code handling the CPU-specific 'online' sysfs attribute. This modification is not supposed to change the user-observable behavior of the kernel (i.e. the 'online' attribute will be present in exactly the same place in sysfs and should trigger exactly the same actions as before). Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Reviewed-by: Toshi Kani --- drivers/base/cpu.c | 87 +++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 56 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 3d48fc887ef4..25c8768172e9 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -16,12 +16,6 @@ #include "base.h" -struct bus_type cpu_subsys = { - .name = "cpu", - .dev_name = "cpu", -}; -EXPORT_SYMBOL_GPL(cpu_subsys); - static DEFINE_PER_CPU(struct device *, cpu_sys_devices); #ifdef CONFIG_HOTPLUG_CPU @@ -34,69 +28,45 @@ static void change_cpu_under_node(struct cpu *cpu, cpu->node_id = to_nid; } -static ssize_t show_online(struct device *dev, - struct device_attribute *attr, - char *buf) +static int __ref cpu_subsys_online(struct device *dev) { struct cpu *cpu = container_of(dev, struct cpu, dev); - - return sprintf(buf, "%u\n", !!cpu_online(cpu->dev.id)); -} - -static ssize_t __ref store_online(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct cpu *cpu = container_of(dev, struct cpu, dev); - int cpuid = cpu->dev.id; + int cpuid = dev->id; int from_nid, to_nid; - ssize_t ret; + int ret; cpu_hotplug_driver_lock(); - switch (buf[0]) { - case '0': - ret = cpu_down(cpuid); - if (!ret) - kobject_uevent(&dev->kobj, KOBJ_OFFLINE); - break; - case '1': - from_nid = cpu_to_node(cpuid); - ret = cpu_up(cpuid); - /* - * When hot adding memory to memoryless node and enabling a cpu - * on the node, node number of the cpu may internally change. - */ - to_nid = cpu_to_node(cpuid); - if (from_nid != to_nid) - change_cpu_under_node(cpu, from_nid, to_nid); + from_nid = cpu_to_node(cpuid); + ret = cpu_up(cpuid); + /* + * When hot adding memory to memoryless node and enabling a cpu + * on the node, node number of the cpu may internally change. + */ + to_nid = cpu_to_node(cpuid); + if (from_nid != to_nid) + change_cpu_under_node(cpu, from_nid, to_nid); - if (!ret) - kobject_uevent(&dev->kobj, KOBJ_ONLINE); - break; - default: - ret = -EINVAL; - } cpu_hotplug_driver_unlock(); - - if (ret >= 0) - ret = count; return ret; } -static DEVICE_ATTR(online, 0644, show_online, store_online); -static void __cpuinit register_cpu_control(struct cpu *cpu) +static int cpu_subsys_offline(struct device *dev) { - device_create_file(&cpu->dev, &dev_attr_online); + int ret; + + cpu_hotplug_driver_lock(); + ret = cpu_down(dev->id); + cpu_hotplug_driver_unlock(); + return ret; } + void unregister_cpu(struct cpu *cpu) { int logical_cpu = cpu->dev.id; unregister_cpu_under_node(logical_cpu, cpu_to_node(logical_cpu)); - device_remove_file(&cpu->dev, &dev_attr_online); - device_unregister(&cpu->dev); per_cpu(cpu_sys_devices, logical_cpu) = NULL; return; @@ -123,12 +93,18 @@ static DEVICE_ATTR(probe, S_IWUSR, NULL, cpu_probe_store); static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store); #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ -#else /* ... !CONFIG_HOTPLUG_CPU */ -static inline void register_cpu_control(struct cpu *cpu) -{ -} #endif /* CONFIG_HOTPLUG_CPU */ +struct bus_type cpu_subsys = { + .name = "cpu", + .dev_name = "cpu", +#ifdef CONFIG_HOTPLUG_CPU + .online = cpu_subsys_online, + .offline = cpu_subsys_offline, +#endif +}; +EXPORT_SYMBOL_GPL(cpu_subsys); + #ifdef CONFIG_KEXEC #include @@ -277,12 +253,11 @@ int __cpuinit register_cpu(struct cpu *cpu, int num) cpu->dev.id = num; cpu->dev.bus = &cpu_subsys; cpu->dev.release = cpu_device_release; + cpu->dev.offline_disabled = !cpu->hotpluggable; #ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE cpu->dev.bus->uevent = arch_cpu_uevent; #endif error = device_register(&cpu->dev); - if (!error && cpu->hotpluggable) - register_cpu_control(cpu); if (!error) per_cpu(cpu_sys_devices, num) = &cpu->dev; if (!error) From 683058e315f00a216fd6c79df4f63bc9945ca434 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 May 2013 00:26:16 +0200 Subject: [PATCH 03/18] ACPI / hotplug: Use device offline/online for graceful hot-removal Modify the generic ACPI hotplug code to be able to check if devices scheduled for hot-removal may be gracefully removed from the system using the device offline/online mechanism introduced previously. Namely, make acpi_scan_hot_remove() handling device hot-removal call device_offline() for all physical companions of the ACPI device nodes involved in the operation and check the results. If any of the device_offline() calls fails, the function will not progress to the removal phase (which cannot be aborted), unless its (new) force argument is set (in case of a failing offline it will put the devices offlined by it back online). In support of 'forced' device hot-removal, add a new sysfs attribute 'force_remove' that will reside under /sys/firmware/acpi/hotplug/. Signed-off-by: Rafael J. Wysocki Reviewed-by: Toshi Kani --- Documentation/ABI/testing/sysfs-firmware-acpi | 10 +++ drivers/acpi/internal.h | 2 + drivers/acpi/scan.c | 84 +++++++++++++++++++ drivers/acpi/sysfs.c | 31 +++++++ include/acpi/acpi_bus.h | 1 + 5 files changed, 128 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi index ce9bee98b43b..b4436cca97a8 100644 --- a/Documentation/ABI/testing/sysfs-firmware-acpi +++ b/Documentation/ABI/testing/sysfs-firmware-acpi @@ -44,6 +44,16 @@ Description: or 0 (unset). Attempts to write any other values to it will cause -EINVAL to be returned. +What: /sys/firmware/acpi/hotplug/force_remove +Date: May 2013 +Contact: Rafael J. Wysocki +Description: + The number in this file (0 or 1) determines whether (1) or not + (0) the ACPI subsystem will allow devices to be hot-removed even + if they cannot be put offline gracefully (from the kernel's + viewpoint). That number can be changed by writing a boolean + value to this file. + What: /sys/firmware/acpi/interrupts/ Date: February 2008 Contact: Len Brown diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 6f1afd9118c8..4548f0a114ce 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -47,6 +47,8 @@ void acpi_memory_hotplug_init(void); static inline void acpi_memory_hotplug_init(void) {} #endif +extern bool acpi_force_hot_remove; + void acpi_sysfs_add_hotplug_profile(struct acpi_hotplug_profile *hotplug, const char *name); int acpi_scan_add_handler_with_hotplug(struct acpi_scan_handler *handler, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index fe158fd4f1df..4fd392005ef1 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -27,6 +27,12 @@ extern struct acpi_device *acpi_root; #define ACPI_IS_ROOT_DEVICE(device) (!(device)->parent) +/* + * If set, devices will be hot-removed even if they cannot be put offline + * gracefully (from the kernel's standpoint). + */ +bool acpi_force_hot_remove; + static const char *dummy_hid = "device"; static LIST_HEAD(acpi_device_list); @@ -120,6 +126,59 @@ acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, cha } static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL); +static acpi_status acpi_bus_offline_companions(acpi_handle handle, u32 lvl, + void *data, void **ret_p) +{ + struct acpi_device *device = NULL; + struct acpi_device_physical_node *pn; + acpi_status status = AE_OK; + + if (acpi_bus_get_device(handle, &device)) + return AE_OK; + + mutex_lock(&device->physical_node_lock); + + list_for_each_entry(pn, &device->physical_node_list, node) { + int ret; + + ret = device_offline(pn->dev); + if (acpi_force_hot_remove) + continue; + + if (ret < 0) { + status = AE_ERROR; + break; + } + pn->put_online = !ret; + } + + mutex_unlock(&device->physical_node_lock); + + return status; +} + +static acpi_status acpi_bus_online_companions(acpi_handle handle, u32 lvl, + void *data, void **ret_p) +{ + struct acpi_device *device = NULL; + struct acpi_device_physical_node *pn; + + if (acpi_bus_get_device(handle, &device)) + return AE_OK; + + mutex_lock(&device->physical_node_lock); + + list_for_each_entry(pn, &device->physical_node_list, node) + if (pn->put_online) { + device_online(pn->dev); + pn->put_online = false; + } + + mutex_unlock(&device->physical_node_lock); + + return AE_OK; +} + static int acpi_scan_hot_remove(struct acpi_device *device) { acpi_handle handle = device->handle; @@ -136,10 +195,33 @@ static int acpi_scan_hot_remove(struct acpi_device *device) return -EINVAL; } + lock_device_hotplug(); + + status = acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX, + NULL, acpi_bus_offline_companions, NULL, + NULL); + if (ACPI_SUCCESS(status) || acpi_force_hot_remove) + status = acpi_bus_offline_companions(handle, 0, NULL, NULL); + + if (ACPI_FAILURE(status) && !acpi_force_hot_remove) { + acpi_bus_online_companions(handle, 0, NULL, NULL); + acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX, + acpi_bus_online_companions, NULL, NULL, + NULL); + + unlock_device_hotplug(); + + put_device(&device->dev); + return -EBUSY; + } + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Hot-removing device %s...\n", dev_name(&device->dev))); acpi_bus_trim(device); + + unlock_device_hotplug(); + /* Device node has been unregistered. */ put_device(&device->dev); device = NULL; @@ -236,6 +318,7 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source) int error; mutex_lock(&acpi_scan_lock); + lock_device_hotplug(); acpi_bus_get_device(handle, &device); if (device) { @@ -259,6 +342,7 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source) kobject_uevent(&device->dev.kobj, KOBJ_ONLINE); out: + unlock_device_hotplug(); acpi_evaluate_hotplug_ost(handle, ost_source, ost_code, NULL); mutex_unlock(&acpi_scan_lock); } diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index fcae5fa2e1b3..5c5d1624fa2c 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -780,6 +780,33 @@ void acpi_sysfs_add_hotplug_profile(struct acpi_hotplug_profile *hotplug, pr_err(PREFIX "Unable to add hotplug profile '%s'\n", name); } +static ssize_t force_remove_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", !!acpi_force_hot_remove); +} + +static ssize_t force_remove_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t size) +{ + bool val; + int ret; + + ret = strtobool(buf, &val); + if (ret < 0) + return ret; + + lock_device_hotplug(); + acpi_force_hot_remove = val; + unlock_device_hotplug(); + return size; +} + +static const struct kobj_attribute force_remove_attr = + __ATTR(force_remove, S_IRUGO | S_IWUSR, force_remove_show, + force_remove_store); + int __init acpi_sysfs_init(void) { int result; @@ -789,6 +816,10 @@ int __init acpi_sysfs_init(void) return result; hotplug_kobj = kobject_create_and_add("hotplug", acpi_kobj); + result = sysfs_create_file(hotplug_kobj, &force_remove_attr.attr); + if (result) + return result; + result = sysfs_create_file(acpi_kobj, &pm_profile_attr.attr); return result; } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 98db31d9f9b4..4d5d3e7ba33d 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -286,6 +286,7 @@ struct acpi_device_physical_node { u8 node_id; struct list_head node; struct device *dev; + bool put_online:1; }; /* set maximum of physical nodes to 32 for expansibility */ From ac212b6980d8d5eda705864fc5a8ecddc6d6eacc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 May 2013 00:26:22 +0200 Subject: [PATCH 04/18] ACPI / processor: Use common hotplug infrastructure Split the ACPI processor driver into two parts, one that is non-modular, resides in the ACPI core and handles the enumeration and hotplug of processors and one that implements the rest of the existing processor driver functionality. The non-modular part uses an ACPI scan handler object to enumerate processors on the basis of information provided by the ACPI namespace and to hook up with the common ACPI hotplug infrastructure. It also populates the ACPI handle of each processor device having a corresponding object in the ACPI namespace, which allows the driver proper to bind to those devices, and makes the driver bind to them if it is readily available (i.e. loaded) when the scan handler's .attach() routine is running. There are a few reasons to make this change. First, switching the ACPI processor driver to using the common ACPI hotplug infrastructure reduces code duplication and size considerably, even though a new file is created along with a header comment etc. Second, since the common hotplug code attempts to offline devices before starting the (non-reversible) removal procedure, it will abort (and possibly roll back) hot-remove operations involving processors if cpu_down() returns an error code for one of them instead of continuing them blindly (if /sys/firmware/acpi/hotplug/force_remove is unset). That is a more desirable behavior than what the current code does. Finally, the separation of the scan/hotplug part from the driver proper makes it possible to simplify the driver's .remove() routine, because it doesn't need to worry about the possible cleanup related to processor removal any more (the scan/hotplug part is responsible for that now) and can handle device removal and driver removal symmetricaly (i.e. as appropriate). Some user-visible changes in sysfs are made (for example, the 'sysdev' link from the ACPI device node to the processor device's directory is gone and a 'physical_node' link is present instead and a corresponding 'firmware_node' is present in the processor device's directory, the processor driver is now visible under /sys/bus/cpu/drivers/ and bound to the processor device), but that shouldn't affect the functionality that users care about (frequency scaling, C-states and thermal management). Tested on my venerable Toshiba Portege R500. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Reviewed-by: Toshi Kani --- drivers/acpi/Makefile | 1 + drivers/acpi/acpi_processor.c | 484 +++++++++++++++++++ drivers/acpi/glue.c | 6 +- drivers/acpi/internal.h | 3 + drivers/acpi/processor_driver.c | 805 +++----------------------------- drivers/acpi/scan.c | 1 + drivers/base/cpu.c | 11 + include/acpi/processor.h | 5 + 8 files changed, 586 insertions(+), 730 deletions(-) create mode 100644 drivers/acpi/acpi_processor.c diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index ecb743bf05a5..93e49bde31ba 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -34,6 +34,7 @@ acpi-$(CONFIG_ACPI_SLEEP) += proc.o acpi-y += bus.o glue.o acpi-y += scan.o acpi-y += resource.o +acpi-y += acpi_processor.o acpi-y += processor_core.o acpi-y += ec.o acpi-$(CONFIG_ACPI_DOCK) += dock.o diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c new file mode 100644 index 000000000000..587d2af4b323 --- /dev/null +++ b/drivers/acpi/acpi_processor.c @@ -0,0 +1,484 @@ +/* + * acpi_processor.c - ACPI processor enumeration support + * + * Copyright (C) 2001, 2002 Andy Grover + * Copyright (C) 2001, 2002 Paul Diefenbaugh + * Copyright (C) 2004 Dominik Brodowski + * Copyright (C) 2004 Anil S Keshavamurthy + * Copyright (C) 2013, Intel Corporation + * Rafael J. Wysocki + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include + +#include + +#include "internal.h" + +#define _COMPONENT ACPI_PROCESSOR_COMPONENT + +ACPI_MODULE_NAME("processor"); + +/* -------------------------------------------------------------------------- + Errata Handling + -------------------------------------------------------------------------- */ + +struct acpi_processor_errata errata __read_mostly; +EXPORT_SYMBOL_GPL(errata); + +static int acpi_processor_errata_piix4(struct pci_dev *dev) +{ + u8 value1 = 0; + u8 value2 = 0; + + + if (!dev) + return -EINVAL; + + /* + * Note that 'dev' references the PIIX4 ACPI Controller. + */ + + switch (dev->revision) { + case 0: + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4 A-step\n")); + break; + case 1: + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4 B-step\n")); + break; + case 2: + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4E\n")); + break; + case 3: + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4M\n")); + break; + default: + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found unknown PIIX4\n")); + break; + } + + switch (dev->revision) { + + case 0: /* PIIX4 A-step */ + case 1: /* PIIX4 B-step */ + /* + * See specification changes #13 ("Manual Throttle Duty Cycle") + * and #14 ("Enabling and Disabling Manual Throttle"), plus + * erratum #5 ("STPCLK# Deassertion Time") from the January + * 2002 PIIX4 specification update. Applies to only older + * PIIX4 models. + */ + errata.piix4.throttle = 1; + + case 2: /* PIIX4E */ + case 3: /* PIIX4M */ + /* + * See erratum #18 ("C3 Power State/BMIDE and Type-F DMA + * Livelock") from the January 2002 PIIX4 specification update. + * Applies to all PIIX4 models. + */ + + /* + * BM-IDE + * ------ + * Find the PIIX4 IDE Controller and get the Bus Master IDE + * Status register address. We'll use this later to read + * each IDE controller's DMA status to make sure we catch all + * DMA activity. + */ + dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82371AB, + PCI_ANY_ID, PCI_ANY_ID, NULL); + if (dev) { + errata.piix4.bmisx = pci_resource_start(dev, 4); + pci_dev_put(dev); + } + + /* + * Type-F DMA + * ---------- + * Find the PIIX4 ISA Controller and read the Motherboard + * DMA controller's status to see if Type-F (Fast) DMA mode + * is enabled (bit 7) on either channel. Note that we'll + * disable C3 support if this is enabled, as some legacy + * devices won't operate well if fast DMA is disabled. + */ + dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82371AB_0, + PCI_ANY_ID, PCI_ANY_ID, NULL); + if (dev) { + pci_read_config_byte(dev, 0x76, &value1); + pci_read_config_byte(dev, 0x77, &value2); + if ((value1 & 0x80) || (value2 & 0x80)) + errata.piix4.fdma = 1; + pci_dev_put(dev); + } + + break; + } + + if (errata.piix4.bmisx) + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "Bus master activity detection (BM-IDE) erratum enabled\n")); + if (errata.piix4.fdma) + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "Type-F DMA livelock erratum (C3 disabled)\n")); + + return 0; +} + +static int acpi_processor_errata(struct acpi_processor *pr) +{ + int result = 0; + struct pci_dev *dev = NULL; + + + if (!pr) + return -EINVAL; + + /* + * PIIX4 + */ + dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82371AB_3, PCI_ANY_ID, + PCI_ANY_ID, NULL); + if (dev) { + result = acpi_processor_errata_piix4(dev); + pci_dev_put(dev); + } + + return result; +} + +/* -------------------------------------------------------------------------- + Initialization + -------------------------------------------------------------------------- */ + +#ifdef CONFIG_ACPI_HOTPLUG_CPU +static int acpi_processor_hotadd_init(struct acpi_processor *pr) +{ + unsigned long long sta; + acpi_status status; + int ret; + + status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta); + if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_PRESENT)) + return -ENODEV; + + ret = acpi_map_lsapic(pr->handle, &pr->id); + if (ret) + return ret; + + ret = arch_register_cpu(pr->id); + if (ret) { + acpi_unmap_lsapic(pr->id); + return ret; + } + + /* + * CPU got hot-added, but cpu_data is not initialized yet. Set a flag + * to delay cpu_idle/throttling initialization and do it when the CPU + * gets online for the first time. + */ + pr_info("CPU%d has been hot-added\n", pr->id); + pr->flags.need_hotplug_init = 1; + return 0; +} +#else +static inline int acpi_processor_hotadd_init(struct acpi_processor *pr) +{ + return -ENODEV; +} +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + +static int acpi_processor_get_info(struct acpi_device *device) +{ + union acpi_object object = { 0 }; + struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; + struct acpi_processor *pr = acpi_driver_data(device); + int cpu_index, device_declaration = 0; + acpi_status status = AE_OK; + static int cpu0_initialized; + + if (num_online_cpus() > 1) + errata.smp = TRUE; + + acpi_processor_errata(pr); + + /* + * Check to see if we have bus mastering arbitration control. This + * is required for proper C3 usage (to maintain cache coherency). + */ + if (acpi_gbl_FADT.pm2_control_block && acpi_gbl_FADT.pm2_control_length) { + pr->flags.bm_control = 1; + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "Bus mastering arbitration control present\n")); + } else + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "No bus mastering arbitration control\n")); + + if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_OBJECT_HID)) { + /* Declared with "Processor" statement; match ProcessorID */ + status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer); + if (ACPI_FAILURE(status)) { + dev_err(&device->dev, + "Failed to evaluate processor object (0x%x)\n", + status); + return -ENODEV; + } + + /* + * TBD: Synch processor ID (via LAPIC/LSAPIC structures) on SMP. + * >>> 'acpi_get_processor_id(acpi_id, &id)' in + * arch/xxx/acpi.c + */ + pr->acpi_id = object.processor.proc_id; + } else { + /* + * Declared with "Device" statement; match _UID. + * Note that we don't handle string _UIDs yet. + */ + unsigned long long value; + status = acpi_evaluate_integer(pr->handle, METHOD_NAME__UID, + NULL, &value); + if (ACPI_FAILURE(status)) { + dev_err(&device->dev, + "Failed to evaluate processor _UID (0x%x)\n", + status); + return -ENODEV; + } + device_declaration = 1; + pr->acpi_id = value; + } + cpu_index = acpi_get_cpuid(pr->handle, device_declaration, pr->acpi_id); + + /* Handle UP system running SMP kernel, with no LAPIC in MADT */ + if (!cpu0_initialized && (cpu_index == -1) && + (num_online_cpus() == 1)) { + cpu_index = 0; + } + + cpu0_initialized = 1; + + pr->id = cpu_index; + + /* + * Extra Processor objects may be enumerated on MP systems with + * less than the max # of CPUs. They should be ignored _iff + * they are physically not present. + */ + if (pr->id == -1) { + int ret = acpi_processor_hotadd_init(pr); + if (ret) + return ret; + } + /* + * On some boxes several processors use the same processor bus id. + * But they are located in different scope. For example: + * \_SB.SCK0.CPU0 + * \_SB.SCK1.CPU0 + * Rename the processor device bus id. And the new bus id will be + * generated as the following format: + * CPU+CPU ID. + */ + sprintf(acpi_device_bid(device), "CPU%X", pr->id); + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id, + pr->acpi_id)); + + if (!object.processor.pblk_address) + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No PBLK (NULL address)\n")); + else if (object.processor.pblk_length != 6) + dev_err(&device->dev, "Invalid PBLK length [%d]\n", + object.processor.pblk_length); + else { + pr->throttling.address = object.processor.pblk_address; + pr->throttling.duty_offset = acpi_gbl_FADT.duty_offset; + pr->throttling.duty_width = acpi_gbl_FADT.duty_width; + + pr->pblk = object.processor.pblk_address; + + /* + * We don't care about error returns - we just try to mark + * these reserved so that nobody else is confused into thinking + * that this region might be unused.. + * + * (In particular, allocating the IO range for Cardbus) + */ + request_region(pr->throttling.address, 6, "ACPI CPU throttle"); + } + + /* + * If ACPI describes a slot number for this CPU, we can use it to + * ensure we get the right value in the "physical id" field + * of /proc/cpuinfo + */ + status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer); + if (ACPI_SUCCESS(status)) + arch_fix_phys_package_id(pr->id, object.integer.value); + + return 0; +} + +/* + * Do not put anything in here which needs the core to be online. + * For example MSR access or setting up things which check for cpuinfo_x86 + * (cpu_data(cpu)) values, like CPU feature flags, family, model, etc. + * Such things have to be put in and set up by the processor driver's .probe(). + */ +static DEFINE_PER_CPU(void *, processor_device_array); + +static int __cpuinit acpi_processor_add(struct acpi_device *device, + const struct acpi_device_id *id) +{ + struct acpi_processor *pr; + struct device *dev; + int result = 0; + + pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); + if (!pr) + return -ENOMEM; + + if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) { + result = -ENOMEM; + goto err_free_pr; + } + + pr->handle = device->handle; + strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME); + strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS); + device->driver_data = pr; + + result = acpi_processor_get_info(device); + if (result) /* Processor is not physically present or unavailable */ + return 0; + +#ifdef CONFIG_SMP + if (pr->id >= setup_max_cpus && pr->id != 0) + return 0; +#endif + + BUG_ON(pr->id >= nr_cpu_ids); + + /* + * Buggy BIOS check. + * ACPI id of processors can be reported wrongly by the BIOS. + * Don't trust it blindly + */ + if (per_cpu(processor_device_array, pr->id) != NULL && + per_cpu(processor_device_array, pr->id) != device) { + dev_warn(&device->dev, + "BIOS reported wrong ACPI id %d for the processor\n", + pr->id); + /* Give up, but do not abort the namespace scan. */ + goto err; + } + /* + * processor_device_array is not cleared on errors to allow buggy BIOS + * checks. + */ + per_cpu(processor_device_array, pr->id) = device; + + dev = get_cpu_device(pr->id); + ACPI_HANDLE_SET(dev, pr->handle); + result = acpi_bind_one(dev, NULL); + if (result) + goto err; + + pr->dev = dev; + dev->offline = pr->flags.need_hotplug_init; + + /* Trigger the processor driver's .probe() if present. */ + if (device_attach(dev) >= 0) + return 1; + + dev_err(dev, "Processor driver could not be attached\n"); + acpi_unbind_one(dev); + + err: + free_cpumask_var(pr->throttling.shared_cpu_map); + device->driver_data = NULL; + err_free_pr: + kfree(pr); + return result; +} + +#ifdef CONFIG_ACPI_HOTPLUG_CPU +/* -------------------------------------------------------------------------- + Removal + -------------------------------------------------------------------------- */ + +static void acpi_processor_remove(struct acpi_device *device) +{ + struct acpi_processor *pr; + + if (!device || !acpi_driver_data(device)) + return; + + pr = acpi_driver_data(device); + if (pr->id >= nr_cpu_ids) + goto out; + + /* + * The only reason why we ever get here is CPU hot-removal. The CPU is + * already offline and the ACPI device removal locking prevents it from + * being put back online at this point. + * + * Unbind the driver from the processor device and detach it from the + * ACPI companion object. + */ + device_release_driver(pr->dev); + acpi_unbind_one(pr->dev); + + /* Clean up. */ + per_cpu(processor_device_array, pr->id) = NULL; + try_offline_node(cpu_to_node(pr->id)); + + /* Remove the CPU. */ + get_online_cpus(); + arch_unregister_cpu(pr->id); + acpi_unmap_lsapic(pr->id); + put_online_cpus(); + + out: + free_cpumask_var(pr->throttling.shared_cpu_map); + kfree(pr); +} +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + +/* + * The following ACPI IDs are known to be suitable for representing as + * processor devices. + */ +static const struct acpi_device_id processor_device_ids[] = { + + { ACPI_PROCESSOR_OBJECT_HID, }, + { ACPI_PROCESSOR_DEVICE_HID, }, + + { } +}; + +static struct acpi_scan_handler __refdata processor_handler = { + .ids = processor_device_ids, + .attach = acpi_processor_add, +#ifdef CONFIG_ACPI_HOTPLUG_CPU + .detach = acpi_processor_remove, +#endif + .hotplug = { + .enabled = true, + }, +}; + +void __init acpi_processor_init(void) +{ + acpi_scan_add_handler_with_hotplug(&processor_handler, "processor"); +} diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 40a84cc6740c..9783f400d857 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -105,7 +105,7 @@ acpi_handle acpi_get_child(acpi_handle parent, u64 address) } EXPORT_SYMBOL(acpi_get_child); -static int acpi_bind_one(struct device *dev, acpi_handle handle) +int acpi_bind_one(struct device *dev, acpi_handle handle) { struct acpi_device *acpi_dev; acpi_status status; @@ -188,8 +188,9 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle) kfree(physical_node); goto err; } +EXPORT_SYMBOL_GPL(acpi_bind_one); -static int acpi_unbind_one(struct device *dev) +int acpi_unbind_one(struct device *dev) { struct acpi_device_physical_node *entry; struct acpi_device *acpi_dev; @@ -238,6 +239,7 @@ err: dev_err(dev, "Oops, 'acpi_handle' corrupt\n"); return -EINVAL; } +EXPORT_SYMBOL_GPL(acpi_unbind_one); static int acpi_platform_notify(struct device *dev) { diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 4548f0a114ce..bf792595132c 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -33,6 +33,7 @@ static inline void acpi_pci_slot_init(void) { } void acpi_pci_root_init(void); void acpi_pci_link_init(void); void acpi_pci_root_hp_init(void); +void acpi_processor_init(void); void acpi_platform_init(void); int acpi_sysfs_init(void); void acpi_csrt_init(void); @@ -79,6 +80,8 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle, int type, unsigned long long sta); void acpi_device_add_finalize(struct acpi_device *device); void acpi_free_pnp_ids(struct acpi_device_pnp *pnp); +int acpi_bind_one(struct device *dev, acpi_handle handle); +int acpi_unbind_one(struct device *dev); /* -------------------------------------------------------------------------- Power Resource diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index c266cdc11784..ac28f18823b3 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -1,11 +1,13 @@ /* - * acpi_processor.c - ACPI Processor Driver ($Revision: 71 $) + * processor_driver.c - ACPI Processor Driver * * Copyright (C) 2001, 2002 Andy Grover * Copyright (C) 2001, 2002 Paul Diefenbaugh * Copyright (C) 2004 Dominik Brodowski * Copyright (C) 2004 Anil S Keshavamurthy * - Added processor hotplug support + * Copyright (C) 2013, Intel Corporation + * Rafael J. Wysocki * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * @@ -24,52 +26,29 @@ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * TBD: - * 1. Make # power states dynamic. - * 2. Support duty_cycle values that span bit 4. - * 3. Optimize by having scheduler determine business instead of - * having us try to calculate it here. - * 4. Need C1 timing -- must modify kernel (IRQ handler) to get this. */ #include #include #include -#include -#include -#include #include #include -#include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include #include +#include "internal.h" + #define PREFIX "ACPI: " -#define ACPI_PROCESSOR_CLASS "processor" -#define ACPI_PROCESSOR_DEVICE_NAME "Processor" #define ACPI_PROCESSOR_FILE_INFO "info" #define ACPI_PROCESSOR_FILE_THROTTLING "throttling" #define ACPI_PROCESSOR_FILE_LIMIT "limit" #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80 #define ACPI_PROCESSOR_NOTIFY_POWER 0x81 #define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82 -#define ACPI_PROCESSOR_DEVICE_HID "ACPI0007" #define ACPI_PROCESSOR_LIMIT_USER 0 #define ACPI_PROCESSOR_LIMIT_THERMAL 1 @@ -81,12 +60,8 @@ MODULE_AUTHOR("Paul Diefenbaugh"); MODULE_DESCRIPTION("ACPI Processor Driver"); MODULE_LICENSE("GPL"); -static int acpi_processor_add(struct acpi_device *device); -static int acpi_processor_remove(struct acpi_device *device); -static void acpi_processor_notify(struct acpi_device *device, u32 event); -static acpi_status acpi_processor_hotadd_init(struct acpi_processor *pr); -static int acpi_processor_handle_eject(struct acpi_processor *pr); -static int acpi_processor_start(struct acpi_processor *pr); +static int acpi_processor_start(struct device *dev); +static int acpi_processor_stop(struct device *dev); static const struct acpi_device_id processor_device_ids[] = { {ACPI_PROCESSOR_OBJECT_HID, 0}, @@ -95,295 +70,27 @@ static const struct acpi_device_id processor_device_ids[] = { }; MODULE_DEVICE_TABLE(acpi, processor_device_ids); -static struct acpi_driver acpi_processor_driver = { +static struct device_driver acpi_processor_driver = { .name = "processor", - .class = ACPI_PROCESSOR_CLASS, - .ids = processor_device_ids, - .ops = { - .add = acpi_processor_add, - .remove = acpi_processor_remove, - .notify = acpi_processor_notify, - }, + .bus = &cpu_subsys, + .acpi_match_table = processor_device_ids, + .probe = acpi_processor_start, + .remove = acpi_processor_stop, }; -#define INSTALL_NOTIFY_HANDLER 1 -#define UNINSTALL_NOTIFY_HANDLER 2 - DEFINE_PER_CPU(struct acpi_processor *, processors); EXPORT_PER_CPU_SYMBOL(processors); -struct acpi_processor_errata errata __read_mostly; - -/* -------------------------------------------------------------------------- - Errata Handling - -------------------------------------------------------------------------- */ - -static int acpi_processor_errata_piix4(struct pci_dev *dev) +static void acpi_processor_notify(acpi_handle handle, u32 event, void *data) { - u8 value1 = 0; - u8 value2 = 0; - - - if (!dev) - return -EINVAL; - - /* - * Note that 'dev' references the PIIX4 ACPI Controller. - */ - - switch (dev->revision) { - case 0: - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4 A-step\n")); - break; - case 1: - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4 B-step\n")); - break; - case 2: - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4E\n")); - break; - case 3: - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found PIIX4M\n")); - break; - default: - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found unknown PIIX4\n")); - break; - } - - switch (dev->revision) { - - case 0: /* PIIX4 A-step */ - case 1: /* PIIX4 B-step */ - /* - * See specification changes #13 ("Manual Throttle Duty Cycle") - * and #14 ("Enabling and Disabling Manual Throttle"), plus - * erratum #5 ("STPCLK# Deassertion Time") from the January - * 2002 PIIX4 specification update. Applies to only older - * PIIX4 models. - */ - errata.piix4.throttle = 1; - - case 2: /* PIIX4E */ - case 3: /* PIIX4M */ - /* - * See erratum #18 ("C3 Power State/BMIDE and Type-F DMA - * Livelock") from the January 2002 PIIX4 specification update. - * Applies to all PIIX4 models. - */ - - /* - * BM-IDE - * ------ - * Find the PIIX4 IDE Controller and get the Bus Master IDE - * Status register address. We'll use this later to read - * each IDE controller's DMA status to make sure we catch all - * DMA activity. - */ - dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82371AB, - PCI_ANY_ID, PCI_ANY_ID, NULL); - if (dev) { - errata.piix4.bmisx = pci_resource_start(dev, 4); - pci_dev_put(dev); - } - - /* - * Type-F DMA - * ---------- - * Find the PIIX4 ISA Controller and read the Motherboard - * DMA controller's status to see if Type-F (Fast) DMA mode - * is enabled (bit 7) on either channel. Note that we'll - * disable C3 support if this is enabled, as some legacy - * devices won't operate well if fast DMA is disabled. - */ - dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82371AB_0, - PCI_ANY_ID, PCI_ANY_ID, NULL); - if (dev) { - pci_read_config_byte(dev, 0x76, &value1); - pci_read_config_byte(dev, 0x77, &value2); - if ((value1 & 0x80) || (value2 & 0x80)) - errata.piix4.fdma = 1; - pci_dev_put(dev); - } - - break; - } - - if (errata.piix4.bmisx) - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Bus master activity detection (BM-IDE) erratum enabled\n")); - if (errata.piix4.fdma) - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Type-F DMA livelock erratum (C3 disabled)\n")); - - return 0; -} - -static int acpi_processor_errata(struct acpi_processor *pr) -{ - int result = 0; - struct pci_dev *dev = NULL; - - - if (!pr) - return -EINVAL; - - /* - * PIIX4 - */ - dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82371AB_3, PCI_ANY_ID, - PCI_ANY_ID, NULL); - if (dev) { - result = acpi_processor_errata_piix4(dev); - pci_dev_put(dev); - } - - return result; -} - -/* -------------------------------------------------------------------------- - Driver Interface - -------------------------------------------------------------------------- */ - -static int acpi_processor_get_info(struct acpi_device *device) -{ - acpi_status status = 0; - union acpi_object object = { 0 }; - struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; + struct acpi_device *device = data; struct acpi_processor *pr; - int cpu_index, device_declaration = 0; - static int cpu0_initialized; - - pr = acpi_driver_data(device); - if (!pr) - return -EINVAL; - - if (num_online_cpus() > 1) - errata.smp = TRUE; - - acpi_processor_errata(pr); - - /* - * Check to see if we have bus mastering arbitration control. This - * is required for proper C3 usage (to maintain cache coherency). - */ - if (acpi_gbl_FADT.pm2_control_block && acpi_gbl_FADT.pm2_control_length) { - pr->flags.bm_control = 1; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Bus mastering arbitration control present\n")); - } else - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "No bus mastering arbitration control\n")); - - if (!strcmp(acpi_device_hid(device), ACPI_PROCESSOR_OBJECT_HID)) { - /* Declared with "Processor" statement; match ProcessorID */ - status = acpi_evaluate_object(pr->handle, NULL, NULL, &buffer); - if (ACPI_FAILURE(status)) { - dev_err(&device->dev, - "Failed to evaluate processor object (0x%x)\n", - status); - return -ENODEV; - } - - /* - * TBD: Synch processor ID (via LAPIC/LSAPIC structures) on SMP. - * >>> 'acpi_get_processor_id(acpi_id, &id)' in - * arch/xxx/acpi.c - */ - pr->acpi_id = object.processor.proc_id; - } else { - /* - * Declared with "Device" statement; match _UID. - * Note that we don't handle string _UIDs yet. - */ - unsigned long long value; - status = acpi_evaluate_integer(pr->handle, METHOD_NAME__UID, - NULL, &value); - if (ACPI_FAILURE(status)) { - dev_err(&device->dev, - "Failed to evaluate processor _UID (0x%x)\n", - status); - return -ENODEV; - } - device_declaration = 1; - pr->acpi_id = value; - } - cpu_index = acpi_get_cpuid(pr->handle, device_declaration, pr->acpi_id); - - /* Handle UP system running SMP kernel, with no LAPIC in MADT */ - if (!cpu0_initialized && (cpu_index == -1) && - (num_online_cpus() == 1)) { - cpu_index = 0; - } - - cpu0_initialized = 1; - - pr->id = cpu_index; - - /* - * Extra Processor objects may be enumerated on MP systems with - * less than the max # of CPUs. They should be ignored _iff - * they are physically not present. - */ - if (pr->id == -1) { - if (ACPI_FAILURE(acpi_processor_hotadd_init(pr))) - return -ENODEV; - } - /* - * On some boxes several processors use the same processor bus id. - * But they are located in different scope. For example: - * \_SB.SCK0.CPU0 - * \_SB.SCK1.CPU0 - * Rename the processor device bus id. And the new bus id will be - * generated as the following format: - * CPU+CPU ID. - */ - sprintf(acpi_device_bid(device), "CPU%X", pr->id); - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id, - pr->acpi_id)); - - if (!object.processor.pblk_address) - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No PBLK (NULL address)\n")); - else if (object.processor.pblk_length != 6) - dev_err(&device->dev, "Invalid PBLK length [%d]\n", - object.processor.pblk_length); - else { - pr->throttling.address = object.processor.pblk_address; - pr->throttling.duty_offset = acpi_gbl_FADT.duty_offset; - pr->throttling.duty_width = acpi_gbl_FADT.duty_width; - - pr->pblk = object.processor.pblk_address; - - /* - * We don't care about error returns - we just try to mark - * these reserved so that nobody else is confused into thinking - * that this region might be unused.. - * - * (In particular, allocating the IO range for Cardbus) - */ - request_region(pr->throttling.address, 6, "ACPI CPU throttle"); - } - - /* - * If ACPI describes a slot number for this CPU, we can use it - * ensure we get the right value in the "physical id" field - * of /proc/cpuinfo - */ - status = acpi_evaluate_object(pr->handle, "_SUN", NULL, &buffer); - if (ACPI_SUCCESS(status)) - arch_fix_phys_package_id(pr->id, object.integer.value); - - return 0; -} - -static DEFINE_PER_CPU(void *, processor_device_array); - -static void acpi_processor_notify(struct acpi_device *device, u32 event) -{ - struct acpi_processor *pr = acpi_driver_data(device); int saved; + if (device->handle != handle) + return; + + pr = acpi_driver_data(device); if (!pr) return; @@ -420,55 +127,62 @@ static void acpi_processor_notify(struct acpi_device *device, u32 event) return; } -static int acpi_cpu_soft_notify(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static __cpuinit int __acpi_processor_start(struct acpi_device *device); + +static int __cpuinit acpi_cpu_soft_notify(struct notifier_block *nfb, + unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; struct acpi_processor *pr = per_cpu(processors, cpu); + struct acpi_device *device; - if (action == CPU_ONLINE && pr) { - /* CPU got physically hotplugged and onlined the first time: - * Initialize missing things + if (!pr || acpi_bus_get_device(pr->handle, &device)) + return NOTIFY_DONE; + + if (action == CPU_ONLINE) { + /* + * CPU got physically hotplugged and onlined for the first time: + * Initialize missing things. */ if (pr->flags.need_hotplug_init) { + int ret; + pr_info("Will online and init hotplugged CPU: %d\n", pr->id); - WARN(acpi_processor_start(pr), "Failed to start CPU:" - " %d\n", pr->id); pr->flags.need_hotplug_init = 0; - /* Normal CPU soft online event */ + ret = __acpi_processor_start(device); + WARN(ret, "Failed to start CPU: %d\n", pr->id); } else { + /* Normal CPU soft online event. */ acpi_processor_ppc_has_changed(pr, 0); acpi_processor_hotplug(pr); acpi_processor_reevaluate_tstate(pr, action); acpi_processor_tstate_has_changed(pr); } - } - if (action == CPU_DEAD && pr) { - /* invalidate the flag.throttling after one CPU is offline */ + } else if (action == CPU_DEAD) { + /* Invalidate flag.throttling after the CPU is offline. */ acpi_processor_reevaluate_tstate(pr, action); } return NOTIFY_OK; } -static struct notifier_block acpi_cpu_notifier = +static struct notifier_block __refdata acpi_cpu_notifier = { .notifier_call = acpi_cpu_soft_notify, }; -/* - * acpi_processor_start() is called by the cpu_hotplug_notifier func: - * acpi_cpu_soft_notify(). Getting it __cpuinit{data} is difficult, the - * root cause seem to be that acpi_processor_uninstall_hotplug_notify() - * is in the module_exit (__exit) func. Allowing acpi_processor_start() - * to not be in __cpuinit section, but being called from __cpuinit funcs - * via __ref looks like the right thing to do here. - */ -static __ref int acpi_processor_start(struct acpi_processor *pr) +static __cpuinit int __acpi_processor_start(struct acpi_device *device) { - struct acpi_device *device = per_cpu(processor_device_array, pr->id); + struct acpi_processor *pr = acpi_driver_data(device); + acpi_status status; int result = 0; + if (!pr) + return -ENODEV; + + if (pr->flags.need_hotplug_init) + return 0; + #ifdef CONFIG_CPU_FREQ acpi_processor_ppc_has_changed(pr, 0); acpi_processor_load_module(pr); @@ -506,129 +220,48 @@ static __ref int acpi_processor_start(struct acpi_processor *pr) goto err_remove_sysfs_thermal; } - return 0; + status = acpi_install_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, + acpi_processor_notify, device); + if (ACPI_SUCCESS(status)) + return 0; -err_remove_sysfs_thermal: + sysfs_remove_link(&pr->cdev->device.kobj, "device"); + err_remove_sysfs_thermal: sysfs_remove_link(&device->dev.kobj, "thermal_cooling"); -err_thermal_unregister: + err_thermal_unregister: thermal_cooling_device_unregister(pr->cdev); -err_power_exit: + err_power_exit: acpi_processor_power_exit(pr); - return result; } -/* - * Do not put anything in here which needs the core to be online. - * For example MSR access or setting up things which check for cpuinfo_x86 - * (cpu_data(cpu)) values, like CPU feature flags, family, model, etc. - * Such things have to be put in and set up above in acpi_processor_start() - */ -static int __cpuinit acpi_processor_add(struct acpi_device *device) +static int __cpuinit acpi_processor_start(struct device *dev) { - struct acpi_processor *pr = NULL; - int result = 0; - struct device *dev; + struct acpi_device *device; - pr = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); - if (!pr) - return -ENOMEM; + if (acpi_bus_get_device(ACPI_HANDLE(dev), &device)) + return -ENODEV; - if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) { - result = -ENOMEM; - goto err_free_pr; - } - - pr->handle = device->handle; - strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME); - strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS); - device->driver_data = pr; - - result = acpi_processor_get_info(device); - if (result) { - /* Processor is physically not present */ - return 0; - } - -#ifdef CONFIG_SMP - if (pr->id >= setup_max_cpus && pr->id != 0) - return 0; -#endif - - BUG_ON(pr->id >= nr_cpu_ids); - - /* - * Buggy BIOS check - * ACPI id of processors can be reported wrongly by the BIOS. - * Don't trust it blindly - */ - if (per_cpu(processor_device_array, pr->id) != NULL && - per_cpu(processor_device_array, pr->id) != device) { - dev_warn(&device->dev, - "BIOS reported wrong ACPI id %d for the processor\n", - pr->id); - result = -ENODEV; - goto err_free_cpumask; - } - per_cpu(processor_device_array, pr->id) = device; - - per_cpu(processors, pr->id) = pr; - - dev = get_cpu_device(pr->id); - if (sysfs_create_link(&device->dev.kobj, &dev->kobj, "sysdev")) { - result = -EFAULT; - goto err_clear_processor; - } - - /* - * Do not start hotplugged CPUs now, but when they - * are onlined the first time - */ - if (pr->flags.need_hotplug_init) - return 0; - - result = acpi_processor_start(pr); - if (result) - goto err_remove_sysfs; - - return 0; - -err_remove_sysfs: - sysfs_remove_link(&device->dev.kobj, "sysdev"); -err_clear_processor: - /* - * processor_device_array is not cleared to allow checks for buggy BIOS - */ - per_cpu(processors, pr->id) = NULL; -err_free_cpumask: - free_cpumask_var(pr->throttling.shared_cpu_map); -err_free_pr: - kfree(pr); - return result; + return __acpi_processor_start(device); } -static int acpi_processor_remove(struct acpi_device *device) +static int acpi_processor_stop(struct device *dev) { - struct acpi_processor *pr = NULL; + struct acpi_device *device; + struct acpi_processor *pr; + if (acpi_bus_get_device(ACPI_HANDLE(dev), &device)) + return 0; - if (!device || !acpi_driver_data(device)) - return -EINVAL; + acpi_remove_notify_handler(device->handle, ACPI_DEVICE_NOTIFY, + acpi_processor_notify); pr = acpi_driver_data(device); - - if (pr->id >= nr_cpu_ids) - goto free; - - if (device->removal_type == ACPI_BUS_REMOVAL_EJECT) { - if (acpi_processor_handle_eject(pr)) - return -EINVAL; - } + if (!pr) + return 0; acpi_processor_power_exit(pr); - sysfs_remove_link(&device->dev.kobj, "sysdev"); - if (pr->cdev) { sysfs_remove_link(&device->dev.kobj, "thermal_cooling"); sysfs_remove_link(&pr->cdev->device.kobj, "device"); @@ -637,331 +270,47 @@ static int acpi_processor_remove(struct acpi_device *device) } per_cpu(processors, pr->id) = NULL; - per_cpu(processor_device_array, pr->id) = NULL; - try_offline_node(cpu_to_node(pr->id)); - -free: - free_cpumask_var(pr->throttling.shared_cpu_map); - kfree(pr); - return 0; } -#ifdef CONFIG_ACPI_HOTPLUG_CPU -/**************************************************************************** - * Acpi processor hotplug support * - ****************************************************************************/ - -static int is_processor_present(acpi_handle handle) -{ - acpi_status status; - unsigned long long sta = 0; - - - status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); - - if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_PRESENT)) - return 1; - - /* - * _STA is mandatory for a processor that supports hot plug - */ - if (status == AE_NOT_FOUND) - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Processor does not support hot plug\n")); - else - ACPI_EXCEPTION((AE_INFO, status, - "Processor Device is not present")); - return 0; -} - -static void acpi_processor_hotplug_notify(acpi_handle handle, - u32 event, void *data) -{ - struct acpi_device *device = NULL; - struct acpi_eject_event *ej_event = NULL; - u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE; /* default */ - acpi_status status; - int result; - - acpi_scan_lock_acquire(); - - switch (event) { - case ACPI_NOTIFY_BUS_CHECK: - case ACPI_NOTIFY_DEVICE_CHECK: - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Processor driver received %s event\n", - (event == ACPI_NOTIFY_BUS_CHECK) ? - "ACPI_NOTIFY_BUS_CHECK" : "ACPI_NOTIFY_DEVICE_CHECK")); - - if (!is_processor_present(handle)) - break; - - if (!acpi_bus_get_device(handle, &device)) - break; - - result = acpi_bus_scan(handle); - if (result) { - acpi_handle_err(handle, "Unable to add the device\n"); - break; - } - result = acpi_bus_get_device(handle, &device); - if (result) { - acpi_handle_err(handle, "Missing device object\n"); - break; - } - ost_code = ACPI_OST_SC_SUCCESS; - break; - - case ACPI_NOTIFY_EJECT_REQUEST: - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "received ACPI_NOTIFY_EJECT_REQUEST\n")); - - if (acpi_bus_get_device(handle, &device)) { - acpi_handle_err(handle, - "Device don't exist, dropping EJECT\n"); - break; - } - if (!acpi_driver_data(device)) { - acpi_handle_err(handle, - "Driver data is NULL, dropping EJECT\n"); - break; - } - - ej_event = kmalloc(sizeof(*ej_event), GFP_KERNEL); - if (!ej_event) { - acpi_handle_err(handle, "No memory, dropping EJECT\n"); - break; - } - - get_device(&device->dev); - ej_event->device = device; - ej_event->event = ACPI_NOTIFY_EJECT_REQUEST; - /* The eject is carried out asynchronously. */ - status = acpi_os_hotplug_execute(acpi_bus_hot_remove_device, - ej_event); - if (ACPI_FAILURE(status)) { - put_device(&device->dev); - kfree(ej_event); - break; - } - goto out; - - default: - ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Unsupported event [0x%x]\n", event)); - - /* non-hotplug event; possibly handled by other handler */ - goto out; - } - - /* Inform firmware that the hotplug operation has completed */ - (void) acpi_evaluate_hotplug_ost(handle, event, ost_code, NULL); - - out: - acpi_scan_lock_release(); -} - -static acpi_status is_processor_device(acpi_handle handle) -{ - struct acpi_device_info *info; - char *hid; - acpi_status status; - - status = acpi_get_object_info(handle, &info); - if (ACPI_FAILURE(status)) - return status; - - if (info->type == ACPI_TYPE_PROCESSOR) { - kfree(info); - return AE_OK; /* found a processor object */ - } - - if (!(info->valid & ACPI_VALID_HID)) { - kfree(info); - return AE_ERROR; - } - - hid = info->hardware_id.string; - if ((hid == NULL) || strcmp(hid, ACPI_PROCESSOR_DEVICE_HID)) { - kfree(info); - return AE_ERROR; - } - - kfree(info); - return AE_OK; /* found a processor device object */ -} - -static acpi_status -processor_walk_namespace_cb(acpi_handle handle, - u32 lvl, void *context, void **rv) -{ - acpi_status status; - int *action = context; - - status = is_processor_device(handle); - if (ACPI_FAILURE(status)) - return AE_OK; /* not a processor; continue to walk */ - - switch (*action) { - case INSTALL_NOTIFY_HANDLER: - acpi_install_notify_handler(handle, - ACPI_SYSTEM_NOTIFY, - acpi_processor_hotplug_notify, - NULL); - break; - case UNINSTALL_NOTIFY_HANDLER: - acpi_remove_notify_handler(handle, - ACPI_SYSTEM_NOTIFY, - acpi_processor_hotplug_notify); - break; - default: - break; - } - - /* found a processor; skip walking underneath */ - return AE_CTRL_DEPTH; -} - -static acpi_status acpi_processor_hotadd_init(struct acpi_processor *pr) -{ - acpi_handle handle = pr->handle; - - if (!is_processor_present(handle)) { - return AE_ERROR; - } - - if (acpi_map_lsapic(handle, &pr->id)) - return AE_ERROR; - - if (arch_register_cpu(pr->id)) { - acpi_unmap_lsapic(pr->id); - return AE_ERROR; - } - - /* CPU got hot-plugged, but cpu_data is not initialized yet - * Set flag to delay cpu_idle/throttling initialization - * in: - * acpi_processor_add() - * acpi_processor_get_info() - * and do it when the CPU gets online the first time - * TBD: Cleanup above functions and try to do this more elegant. - */ - pr_info("CPU %d got hotplugged\n", pr->id); - pr->flags.need_hotplug_init = 1; - - return AE_OK; -} - -static int acpi_processor_handle_eject(struct acpi_processor *pr) -{ - if (cpu_online(pr->id)) - cpu_down(pr->id); - - get_online_cpus(); - /* - * The cpu might become online again at this point. So we check whether - * the cpu has been onlined or not. If the cpu became online, it means - * that someone wants to use the cpu. So acpi_processor_handle_eject() - * returns -EAGAIN. - */ - if (unlikely(cpu_online(pr->id))) { - put_online_cpus(); - pr_warn("Failed to remove CPU %d, because other task " - "brought the CPU back online\n", pr->id); - return -EAGAIN; - } - arch_unregister_cpu(pr->id); - acpi_unmap_lsapic(pr->id); - put_online_cpus(); - return (0); -} -#else -static acpi_status acpi_processor_hotadd_init(struct acpi_processor *pr) -{ - return AE_ERROR; -} -static int acpi_processor_handle_eject(struct acpi_processor *pr) -{ - return (-EINVAL); -} -#endif - -static -void acpi_processor_install_hotplug_notify(void) -{ -#ifdef CONFIG_ACPI_HOTPLUG_CPU - int action = INSTALL_NOTIFY_HANDLER; - acpi_walk_namespace(ACPI_TYPE_ANY, - ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, - processor_walk_namespace_cb, NULL, &action, NULL); -#endif - register_hotcpu_notifier(&acpi_cpu_notifier); -} - -static -void acpi_processor_uninstall_hotplug_notify(void) -{ -#ifdef CONFIG_ACPI_HOTPLUG_CPU - int action = UNINSTALL_NOTIFY_HANDLER; - acpi_walk_namespace(ACPI_TYPE_ANY, - ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, - processor_walk_namespace_cb, NULL, &action, NULL); -#endif - unregister_hotcpu_notifier(&acpi_cpu_notifier); -} - /* * We keep the driver loaded even when ACPI is not running. * This is needed for the powernow-k8 driver, that works even without * ACPI, but needs symbols from this driver */ -static int __init acpi_processor_init(void) +static int __init acpi_processor_driver_init(void) { int result = 0; if (acpi_disabled) return 0; - result = acpi_bus_register_driver(&acpi_processor_driver); + result = driver_register(&acpi_processor_driver); if (result < 0) return result; acpi_processor_syscore_init(); - - acpi_processor_install_hotplug_notify(); - + register_hotcpu_notifier(&acpi_cpu_notifier); acpi_thermal_cpufreq_init(); - acpi_processor_ppc_init(); - acpi_processor_throttling_init(); - return 0; } -static void __exit acpi_processor_exit(void) +static void __exit acpi_processor_driver_exit(void) { if (acpi_disabled) return; acpi_processor_ppc_exit(); - acpi_thermal_cpufreq_exit(); - - acpi_processor_uninstall_hotplug_notify(); - + unregister_hotcpu_notifier(&acpi_cpu_notifier); acpi_processor_syscore_exit(); - - acpi_bus_unregister_driver(&acpi_processor_driver); - - return; + driver_unregister(&acpi_processor_driver); } -module_init(acpi_processor_init); -module_exit(acpi_processor_exit); +module_init(acpi_processor_driver_init); +module_exit(acpi_processor_driver_exit); MODULE_ALIAS("processor"); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 4fd392005ef1..ad82bb2a37e0 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2124,6 +2124,7 @@ int __init acpi_scan_init(void) acpi_pci_root_init(); acpi_pci_link_init(); + acpi_processor_init(); acpi_platform_init(); acpi_lpss_init(); acpi_csrt_init(); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 25c8768172e9..7431ba6fc2d4 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -13,11 +13,21 @@ #include #include #include +#include #include "base.h" static DEFINE_PER_CPU(struct device *, cpu_sys_devices); +static int cpu_subsys_match(struct device *dev, struct device_driver *drv) +{ + /* ACPI style match is the only one that may succeed. */ + if (acpi_driver_match_device(dev, drv)) + return 1; + + return 0; +} + #ifdef CONFIG_HOTPLUG_CPU static void change_cpu_under_node(struct cpu *cpu, unsigned int from_nid, unsigned int to_nid) @@ -98,6 +108,7 @@ static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store); struct bus_type cpu_subsys = { .name = "cpu", .dev_name = "cpu", + .match = cpu_subsys_match, #ifdef CONFIG_HOTPLUG_CPU .online = cpu_subsys_online, .offline = cpu_subsys_offline, diff --git a/include/acpi/processor.h b/include/acpi/processor.h index ea69367fdd3b..66096d06925e 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -6,6 +6,10 @@ #include #include +#define ACPI_PROCESSOR_CLASS "processor" +#define ACPI_PROCESSOR_DEVICE_NAME "Processor" +#define ACPI_PROCESSOR_DEVICE_HID "ACPI0007" + #define ACPI_PROCESSOR_BUSY_METRIC 10 #define ACPI_PROCESSOR_MAX_POWER 8 @@ -207,6 +211,7 @@ struct acpi_processor { struct acpi_processor_throttling throttling; struct acpi_processor_limit limit; struct thermal_cooling_device *cdev; + struct device *dev; /* Processor device. */ }; struct acpi_processor_errata { From e2ff39400d81233374e780b133496a2296643d7d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 8 May 2013 00:29:49 +0200 Subject: [PATCH 05/18] ACPI / memhotplug: Bind removable memory blocks to ACPI device nodes During ACPI memory hotplug configuration bind memory blocks residing in modules removable through the standard ACPI mechanism to struct acpi_device objects associated with ACPI namespace objects representing those modules. Accordingly, unbind those memory blocks from the struct acpi_device objects when the memory modules in question are being removed. When "offline" operation for devices representing memory blocks is introduced, this will allow the ACPI core's device hot-remove code to use it to carry out remove_memory() for those memory blocks and check the results of that before it actually removes the modules holding them from the system. Since walk_memory_range() is used for accessing all memory blocks corresponding to a given ACPI namespace object, it is exported from memory_hotplug.c so that the code in acpi_memhotplug.c can use it. Signed-off-by: Rafael J. Wysocki Tested-by: Vasilis Liaskovitis Reviewed-by: Toshi Kani --- drivers/acpi/acpi_memhotplug.c | 53 ++++++++++++++++++++++++++++++++-- include/linux/memory_hotplug.h | 2 ++ mm/memory_hotplug.c | 4 ++- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 5e6301e94920..5590db12028e 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -28,6 +28,7 @@ */ #include +#include #include #include "internal.h" @@ -166,13 +167,50 @@ static int acpi_memory_check_device(struct acpi_memory_device *mem_device) return 0; } +static unsigned long acpi_meminfo_start_pfn(struct acpi_memory_info *info) +{ + return PFN_DOWN(info->start_addr); +} + +static unsigned long acpi_meminfo_end_pfn(struct acpi_memory_info *info) +{ + return PFN_UP(info->start_addr + info->length-1); +} + +static int acpi_bind_memblk(struct memory_block *mem, void *arg) +{ + return acpi_bind_one(&mem->dev, (acpi_handle)arg); +} + +static int acpi_bind_memory_blocks(struct acpi_memory_info *info, + acpi_handle handle) +{ + return walk_memory_range(acpi_meminfo_start_pfn(info), + acpi_meminfo_end_pfn(info), (void *)handle, + acpi_bind_memblk); +} + +static int acpi_unbind_memblk(struct memory_block *mem, void *arg) +{ + acpi_unbind_one(&mem->dev); + return 0; +} + +static void acpi_unbind_memory_blocks(struct acpi_memory_info *info, + acpi_handle handle) +{ + walk_memory_range(acpi_meminfo_start_pfn(info), + acpi_meminfo_end_pfn(info), NULL, acpi_unbind_memblk); +} + static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) { + acpi_handle handle = mem_device->device->handle; int result, num_enabled = 0; struct acpi_memory_info *info; int node; - node = acpi_get_node(mem_device->device->handle); + node = acpi_get_node(handle); /* * Tell the VM there is more memory here... * Note: Assume that this function returns zero on success @@ -203,6 +241,12 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) if (result && result != -EEXIST) continue; + result = acpi_bind_memory_blocks(info, handle); + if (result) { + acpi_unbind_memory_blocks(info, handle); + return -ENODEV; + } + info->enabled = 1; /* @@ -229,10 +273,11 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) static int acpi_memory_remove_memory(struct acpi_memory_device *mem_device) { + acpi_handle handle = mem_device->device->handle; int result = 0, nid; struct acpi_memory_info *info, *n; - nid = acpi_get_node(mem_device->device->handle); + nid = acpi_get_node(handle); list_for_each_entry_safe(info, n, &mem_device->res_list, list) { if (!info->enabled) @@ -240,6 +285,8 @@ static int acpi_memory_remove_memory(struct acpi_memory_device *mem_device) if (nid < 0) nid = memory_add_physaddr_to_nid(info->start_addr); + + acpi_unbind_memory_blocks(info, handle); result = remove_memory(nid, info->start_addr, info->length); if (result) return result; @@ -300,7 +347,7 @@ static int acpi_memory_device_add(struct acpi_device *device, if (result) { dev_err(&device->dev, "acpi_memory_enable_device() error\n"); acpi_memory_device_free(mem_device); - return -ENODEV; + return result; } dev_dbg(&device->dev, "Memory device configured by ACPI\n"); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 3e622c610925..2975b7b2a9d8 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -245,6 +245,8 @@ static inline int is_mem_section_removable(unsigned long pfn, static inline void try_offline_node(int nid) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ +extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, + void *arg, int (*func)(struct memory_block *, void *)); extern int mem_online_node(int nid); extern int add_memory(int nid, u64 start, u64 size); extern int arch_add_memory(int nid, u64 start, u64 size); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a221fac1f47d..5ea1287ee91f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1618,6 +1618,7 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages) { return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ); } +#endif /* CONFIG_MEMORY_HOTREMOVE */ /** * walk_memory_range - walks through all mem sections in [start_pfn, end_pfn) @@ -1631,7 +1632,7 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages) * * Returns the return value of func. */ -static int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, +int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)) { struct memory_block *mem = NULL; @@ -1668,6 +1669,7 @@ static int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, return 0; } +#ifdef CONFIG_MEMORY_HOTREMOVE /** * offline_memory_block_cb - callback function for offlining memory block * @mem: the memory block to be offlined From 4960e05e22604ee270a023f968e0e4f9bd0c6fef Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 8 May 2013 14:18:37 +0200 Subject: [PATCH 06/18] Driver core: Introduce offline/online callbacks for memory blocks Introduce .offline() and .online() callbacks for memory_subsys that will allow the generic device_offline() and device_online() to be used with device objects representing memory blocks. That, in turn, allows the ACPI subsystem to use device_offline() to put removable memory blocks offline, if possible, before removing memory modules holding them. The 'online' sysfs attribute of memory block devices will attempt to put them offline if 0 is written to it and will attempt to apply the previously used online type when onlining them (i.e. when 1 is written to it). Signed-off-by: Rafael J. Wysocki Tested-by: Vasilis Liaskovitis Acked-by: Greg Kroah-Hartman Reviewed-by: Toshi Kani --- drivers/base/memory.c | 112 ++++++++++++++++++++++++++++++++--------- include/linux/memory.h | 1 + 2 files changed, 88 insertions(+), 25 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 14f8a6954da0..c8f3b63fcacd 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -37,9 +37,14 @@ static inline int base_memory_block_id(int section_nr) return section_nr / sections_per_block; } +static int memory_subsys_online(struct device *dev); +static int memory_subsys_offline(struct device *dev); + static struct bus_type memory_subsys = { .name = MEMORY_CLASS_NAME, .dev_name = MEMORY_CLASS_NAME, + .online = memory_subsys_online, + .offline = memory_subsys_offline, }; static BLOCKING_NOTIFIER_HEAD(memory_chain); @@ -88,6 +93,7 @@ int register_memory(struct memory_block *memory) memory->dev.bus = &memory_subsys; memory->dev.id = memory->start_section_nr / sections_per_block; memory->dev.release = memory_block_release; + memory->dev.offline = memory->state == MEM_OFFLINE; error = device_register(&memory->dev); return error; @@ -278,33 +284,70 @@ static int __memory_block_change_state(struct memory_block *mem, { int ret = 0; - if (mem->state != from_state_req) { - ret = -EINVAL; - goto out; - } + if (mem->state != from_state_req) + return -EINVAL; if (to_state == MEM_OFFLINE) mem->state = MEM_GOING_OFFLINE; ret = memory_block_action(mem->start_section_nr, to_state, online_type); - if (ret) { mem->state = from_state_req; - goto out; + } else { + mem->state = to_state; + if (to_state == MEM_ONLINE) + mem->last_online = online_type; } + return ret; +} - mem->state = to_state; - switch (mem->state) { - case MEM_OFFLINE: - kobject_uevent(&mem->dev.kobj, KOBJ_OFFLINE); - break; - case MEM_ONLINE: - kobject_uevent(&mem->dev.kobj, KOBJ_ONLINE); - break; - default: - break; +static int memory_subsys_online(struct device *dev) +{ + struct memory_block *mem = container_of(dev, struct memory_block, dev); + int ret; + + mutex_lock(&mem->state_mutex); + + ret = mem->state == MEM_ONLINE ? 0 : + __memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE, + mem->last_online); + + mutex_unlock(&mem->state_mutex); + return ret; +} + +static int memory_subsys_offline(struct device *dev) +{ + struct memory_block *mem = container_of(dev, struct memory_block, dev); + int ret; + + mutex_lock(&mem->state_mutex); + + ret = mem->state == MEM_OFFLINE ? 0 : + __memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE, -1); + + mutex_unlock(&mem->state_mutex); + return ret; +} + +static int __memory_block_change_state_uevent(struct memory_block *mem, + unsigned long to_state, unsigned long from_state_req, + int online_type) +{ + int ret = __memory_block_change_state(mem, to_state, from_state_req, + online_type); + if (!ret) { + switch (mem->state) { + case MEM_OFFLINE: + kobject_uevent(&mem->dev.kobj, KOBJ_OFFLINE); + break; + case MEM_ONLINE: + kobject_uevent(&mem->dev.kobj, KOBJ_ONLINE); + break; + default: + break; + } } -out: return ret; } @@ -315,8 +358,8 @@ static int memory_block_change_state(struct memory_block *mem, int ret; mutex_lock(&mem->state_mutex); - ret = __memory_block_change_state(mem, to_state, from_state_req, - online_type); + ret = __memory_block_change_state_uevent(mem, to_state, from_state_req, + online_type); mutex_unlock(&mem->state_mutex); return ret; @@ -326,22 +369,34 @@ store_mem_state(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct memory_block *mem; + bool offline; int ret = -EINVAL; mem = container_of(dev, struct memory_block, dev); - if (!strncmp(buf, "online_kernel", min_t(int, count, 13))) + lock_device_hotplug(); + + if (!strncmp(buf, "online_kernel", min_t(int, count, 13))) { + offline = false; ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE, ONLINE_KERNEL); - else if (!strncmp(buf, "online_movable", min_t(int, count, 14))) + } else if (!strncmp(buf, "online_movable", min_t(int, count, 14))) { + offline = false; ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE, ONLINE_MOVABLE); - else if (!strncmp(buf, "online", min_t(int, count, 6))) + } else if (!strncmp(buf, "online", min_t(int, count, 6))) { + offline = false; ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE, ONLINE_KEEP); - else if(!strncmp(buf, "offline", min_t(int, count, 7))) + } else if(!strncmp(buf, "offline", min_t(int, count, 7))) { + offline = true; ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE, -1); + } + if (!ret) + dev->offline = offline; + + unlock_device_hotplug(); if (ret) return ret; @@ -563,6 +618,7 @@ static int init_memory_block(struct memory_block **memory, base_memory_block_id(scn_nr) * sections_per_block; mem->end_section_nr = mem->start_section_nr + sections_per_block - 1; mem->state = state; + mem->last_online = ONLINE_KEEP; mem->section_count++; mutex_init(&mem->state_mutex); start_pfn = section_nr_to_pfn(mem->start_section_nr); @@ -681,14 +737,20 @@ int unregister_memory_section(struct mem_section *section) /* * offline one memory block. If the memory block has been offlined, do nothing. + * + * Call under device_hotplug_lock. */ int offline_memory_block(struct memory_block *mem) { int ret = 0; mutex_lock(&mem->state_mutex); - if (mem->state != MEM_OFFLINE) - ret = __memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE, -1); + if (mem->state != MEM_OFFLINE) { + ret = __memory_block_change_state_uevent(mem, MEM_OFFLINE, + MEM_ONLINE, -1); + if (!ret) + mem->dev.offline = true; + } mutex_unlock(&mem->state_mutex); return ret; diff --git a/include/linux/memory.h b/include/linux/memory.h index 85c31a8e2904..3d5346583022 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -26,6 +26,7 @@ struct memory_block { unsigned long start_section_nr; unsigned long end_section_nr; unsigned long state; + int last_online; int section_count; /* From 1001b4d4a8ee6b2e7a6078a02ccdf68f91b192bd Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 30 May 2013 00:30:05 +0200 Subject: [PATCH 07/18] CPU: Fix sysfs cpu/online of offlined CPUs As reported by Dave Hansen, sysfs cpu/online shows 1 for offlined CPUs at boot. Fix this problem by initializing dev.offline with cpu_online() when registering a CPU. References: https://lkml.org/lkml/2013/5/29/403 Reported-and-tested-by: Dave Hansen Signed-off-by: Toshi Kani Signed-off-by: Rafael J. Wysocki --- drivers/base/cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 7431ba6fc2d4..1d110dc6f0c1 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -265,6 +265,7 @@ int __cpuinit register_cpu(struct cpu *cpu, int num) cpu->dev.bus = &cpu_subsys; cpu->dev.release = cpu_device_release; cpu->dev.offline_disabled = !cpu->hotpluggable; + cpu->dev.offline = !cpu_online(num); #ifdef CONFIG_ARCH_HAS_CPU_AUTOPROBE cpu->dev.bus->uevent = arch_cpu_uevent; #endif From 2e4f1db49d97222110b6add9a2c6cf5251a41e35 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 30 May 2013 21:55:46 +0200 Subject: [PATCH 08/18] ACPI / processor: Initialize per_cpu(processors, pr->id) properly Commit ac212b6 (ACPI / processor: Use common hotplug infrastructure) forgot about initializing the per-CPU 'processors' variables which lead to ACPI cpuidle failure to use C-states and caused boot slowdown on multi-CPU machines. Fix the problem by adding per_cpu(processors, pr->id) initialization to acpi_processor_add() and add make acpi_processor_remove() clean it up as appropriate. Also modify acpi_processor_stop() so that it doesn't clear per_cpu(processors, pr->id) on processor driver removal which would then cause problems to happen when the driver is loaded again. This version of the patch contains fixes from Yinghai Lu. Reported-and-tested-by: Yinghai Lu Reported-and-tested-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_processor.c | 6 ++++++ drivers/acpi/processor_driver.c | 5 ----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 587d2af4b323..cae2641e8d84 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -29,6 +29,9 @@ ACPI_MODULE_NAME("processor"); +DEFINE_PER_CPU(struct acpi_processor *, processors); +EXPORT_PER_CPU_SYMBOL(processors); + /* -------------------------------------------------------------------------- Errata Handling -------------------------------------------------------------------------- */ @@ -387,6 +390,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device, * checks. */ per_cpu(processor_device_array, pr->id) = device; + per_cpu(processors, pr->id) = pr; dev = get_cpu_device(pr->id); ACPI_HANDLE_SET(dev, pr->handle); @@ -407,6 +411,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device, err: free_cpumask_var(pr->throttling.shared_cpu_map); device->driver_data = NULL; + per_cpu(processors, pr->id) = NULL; err_free_pr: kfree(pr); return result; @@ -441,6 +446,7 @@ static void acpi_processor_remove(struct acpi_device *device) /* Clean up. */ per_cpu(processor_device_array, pr->id) = NULL; + per_cpu(processors, pr->id) = NULL; try_offline_node(cpu_to_node(pr->id)); /* Remove the CPU. */ diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index ac28f18823b3..d93963f1e8f4 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -78,9 +78,6 @@ static struct device_driver acpi_processor_driver = { .remove = acpi_processor_stop, }; -DEFINE_PER_CPU(struct acpi_processor *, processors); -EXPORT_PER_CPU_SYMBOL(processors); - static void acpi_processor_notify(acpi_handle handle, u32 event, void *data) { struct acpi_device *device = data; @@ -268,8 +265,6 @@ static int acpi_processor_stop(struct device *dev) thermal_cooling_device_unregister(pr->cdev); pr->cdev = NULL; } - - per_cpu(processors, pr->id) = NULL; return 0; } From b2c064b25ad07169b2892a733918e6b941bf3366 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 May 2013 10:38:55 +0200 Subject: [PATCH 09/18] Driver core / memory: Simplify __memory_block_change_state() As noted by Tang Chen, the last_online field in struct memory_block introduced by commit 4960e05 (Driver core: Introduce offline/online callbacks for memory blocks) is not really necessary, because online_pages() restores the previous state if passed ONLINE_KEEP as the last argument. Therefore, remove that field along with the code referring to it. References: http://marc.info/?l=linux-kernel&m=136919777305599&w=2 Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Reviewed-by: Tang Chen --- drivers/base/memory.c | 11 ++--------- include/linux/memory.h | 1 - 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c8f3b63fcacd..c7092bc3c01e 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -291,13 +291,7 @@ static int __memory_block_change_state(struct memory_block *mem, mem->state = MEM_GOING_OFFLINE; ret = memory_block_action(mem->start_section_nr, to_state, online_type); - if (ret) { - mem->state = from_state_req; - } else { - mem->state = to_state; - if (to_state == MEM_ONLINE) - mem->last_online = online_type; - } + mem->state = ret ? from_state_req : to_state; return ret; } @@ -310,7 +304,7 @@ static int memory_subsys_online(struct device *dev) ret = mem->state == MEM_ONLINE ? 0 : __memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE, - mem->last_online); + ONLINE_KEEP); mutex_unlock(&mem->state_mutex); return ret; @@ -618,7 +612,6 @@ static int init_memory_block(struct memory_block **memory, base_memory_block_id(scn_nr) * sections_per_block; mem->end_section_nr = mem->start_section_nr + sections_per_block - 1; mem->state = state; - mem->last_online = ONLINE_KEEP; mem->section_count++; mutex_init(&mem->state_mutex); start_pfn = section_nr_to_pfn(mem->start_section_nr); diff --git a/include/linux/memory.h b/include/linux/memory.h index 3d5346583022..85c31a8e2904 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -26,7 +26,6 @@ struct memory_block { unsigned long start_section_nr; unsigned long end_section_nr; unsigned long state; - int last_online; int section_count; /* From 2e199192df85eb936a7829dc28b57b85c59c86fc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 May 2013 10:40:35 +0200 Subject: [PATCH 10/18] ACPI: Drop removal_type field from struct acpi_device The ACPI processor driver was the only user of the removal_type field in struct acpi_device, but it doesn't use that field any more after recent changes. Thus, removal_type has no more users, so drop it along with the associated data type. Signed-off-by: Rafael J. Wysocki Acked-by: Toshi Kani --- drivers/acpi/scan.c | 2 -- include/acpi/acpi_bus.h | 8 -------- 2 files changed, 10 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ad82bb2a37e0..ba8ee6cbf0f1 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1036,7 +1036,6 @@ int acpi_device_add(struct acpi_device *device, printk(KERN_ERR PREFIX "Error creating sysfs interface for device %s\n", dev_name(&device->dev)); - device->removal_type = ACPI_BUS_REMOVAL_NORMAL; return 0; err: @@ -2025,7 +2024,6 @@ static acpi_status acpi_bus_device_detach(acpi_handle handle, u32 lvl_not_used, if (!acpi_bus_get_device(handle, &device)) { struct acpi_scan_handler *dev_handler = device->handler; - device->removal_type = ACPI_BUS_REMOVAL_EJECT; if (dev_handler) { if (dev_handler->detach) dev_handler->detach(device); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 4d5d3e7ba33d..1a681ee2aa08 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -63,13 +63,6 @@ acpi_get_physical_device_location(acpi_handle handle, struct acpi_pld_info **pld #define ACPI_BUS_FILE_ROOT "acpi" extern struct proc_dir_entry *acpi_root_dir; -enum acpi_bus_removal_type { - ACPI_BUS_REMOVAL_NORMAL = 0, - ACPI_BUS_REMOVAL_EJECT, - ACPI_BUS_REMOVAL_SUPRISE, - ACPI_BUS_REMOVAL_TYPE_COUNT -}; - enum acpi_bus_device_type { ACPI_BUS_TYPE_DEVICE = 0, ACPI_BUS_TYPE_POWER, @@ -311,7 +304,6 @@ struct acpi_device { struct acpi_driver *driver; void *driver_data; struct device dev; - enum acpi_bus_removal_type removal_type; /* indicate for different removal type */ u8 physical_node_count; struct list_head physical_node_list; struct mutex physical_node_lock; From be547436c22c3b7d934d9afd841cfd7a6807f7ab Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 May 2013 10:40:56 +0200 Subject: [PATCH 11/18] ACPI / processor: Pass processor object handle to acpi_bind_one() Make acpi_processor_add() pass the ACPI handle of the processor namespace object to acpi_bind_one() instead of setting it directly to allow acpi_bind_one() to catch possible bugs causing the ACPI handle of the processor device to be set earlier. Signed-off-by: Rafael J. Wysocki Acked-by: Toshi Kani --- drivers/acpi/acpi_processor.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index cae2641e8d84..157e7389a5ff 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -393,8 +393,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device, per_cpu(processors, pr->id) = pr; dev = get_cpu_device(pr->id); - ACPI_HANDLE_SET(dev, pr->handle); - result = acpi_bind_one(dev, NULL); + result = acpi_bind_one(dev, pr->handle); if (result) goto err; From ea50be59345a2b714fd3ed43e1bba89906c177c3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 May 2013 10:41:50 +0200 Subject: [PATCH 12/18] Driver core / MM: Drop offline_memory_block() Since offline_memory_block(mem) is functionally equivalent to device_offline(&mem->dev), make the only caller of the former use the latter instead and drop offline_memory_block() entirely. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Acked-by: Toshi Kani --- drivers/base/memory.c | 21 --------------------- include/linux/memory_hotplug.h | 1 - mm/memory_hotplug.c | 2 +- 3 files changed, 1 insertion(+), 23 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c7092bc3c01e..4ebf97f99fae 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -728,27 +728,6 @@ int unregister_memory_section(struct mem_section *section) } #endif /* CONFIG_MEMORY_HOTREMOVE */ -/* - * offline one memory block. If the memory block has been offlined, do nothing. - * - * Call under device_hotplug_lock. - */ -int offline_memory_block(struct memory_block *mem) -{ - int ret = 0; - - mutex_lock(&mem->state_mutex); - if (mem->state != MEM_OFFLINE) { - ret = __memory_block_change_state_uevent(mem, MEM_OFFLINE, - MEM_ONLINE, -1); - if (!ret) - mem->dev.offline = true; - } - mutex_unlock(&mem->state_mutex); - - return ret; -} - /* return true if the memory block is offlined, otherwise, return false */ bool is_memblock_offlined(struct memory_block *mem) { diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 2975b7b2a9d8..ae5480a00963 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -251,7 +251,6 @@ extern int mem_online_node(int nid); extern int add_memory(int nid, u64 start, u64 size); extern int arch_add_memory(int nid, u64 start, u64 size); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); -extern int offline_memory_block(struct memory_block *mem); extern bool is_memblock_offlined(struct memory_block *mem); extern int remove_memory(int nid, u64 start, u64 size); extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 5ea1287ee91f..a39841d240e8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1680,7 +1680,7 @@ int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, static int offline_memory_block_cb(struct memory_block *mem, void *arg) { int *ret = arg; - int error = offline_memory_block(mem); + int error = device_offline(&mem->dev); if (error != 0 && *ret == 0) *ret = error; From 303bfdb1a14d0460feb859cd008ff81da36b517c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 May 2013 10:43:13 +0200 Subject: [PATCH 13/18] ACPI / scan: Add second pass of companion offlining to hot-remove code As indicated by comments in mm/memory_hotplug.c:remove_memory(), if CONFIG_MEMCG is set, it may not be possible to offline all of the memory blocks held by one module (FRU) in one pass (because one of them may be used by the others to store page cgroup in that case and that block has to be offlined before the other ones). To handle that arguably corner case, add a second pass of companion device offlining to acpi_scan_hot_remove() and make it ignore errors returned in the first pass (and make it skip the second pass if the first one is successful). Signed-off-by: Rafael J. Wysocki Acked-by: Toshi Kani --- drivers/acpi/scan.c | 67 +++++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 17 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index ba8ee6cbf0f1..2959fe1ce43e 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -131,6 +131,7 @@ static acpi_status acpi_bus_offline_companions(acpi_handle handle, u32 lvl, { struct acpi_device *device = NULL; struct acpi_device_physical_node *pn; + bool second_pass = (bool)data; acpi_status status = AE_OK; if (acpi_bus_get_device(handle, &device)) @@ -141,15 +142,26 @@ static acpi_status acpi_bus_offline_companions(acpi_handle handle, u32 lvl, list_for_each_entry(pn, &device->physical_node_list, node) { int ret; + if (second_pass) { + /* Skip devices offlined by the first pass. */ + if (pn->put_online) + continue; + } else { + pn->put_online = false; + } ret = device_offline(pn->dev); if (acpi_force_hot_remove) continue; - if (ret < 0) { - status = AE_ERROR; - break; + if (ret >= 0) { + pn->put_online = !ret; + } else { + *ret_p = pn->dev; + if (second_pass) { + status = AE_ERROR; + break; + } } - pn->put_online = !ret; } mutex_unlock(&device->physical_node_lock); @@ -185,6 +197,7 @@ static int acpi_scan_hot_remove(struct acpi_device *device) acpi_handle not_used; struct acpi_object_list arg_list; union acpi_object arg; + struct device *errdev; acpi_status status; unsigned long long sta; @@ -197,22 +210,42 @@ static int acpi_scan_hot_remove(struct acpi_device *device) lock_device_hotplug(); - status = acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX, - NULL, acpi_bus_offline_companions, NULL, - NULL); - if (ACPI_SUCCESS(status) || acpi_force_hot_remove) - status = acpi_bus_offline_companions(handle, 0, NULL, NULL); - - if (ACPI_FAILURE(status) && !acpi_force_hot_remove) { - acpi_bus_online_companions(handle, 0, NULL, NULL); + /* + * Carry out two passes here and ignore errors in the first pass, + * because if the devices in question are memory blocks and + * CONFIG_MEMCG is set, one of the blocks may hold data structures + * that the other blocks depend on, but it is not known in advance which + * block holds them. + * + * If the first pass is successful, the second one isn't needed, though. + */ + errdev = NULL; + acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX, + NULL, acpi_bus_offline_companions, + (void *)false, (void **)&errdev); + acpi_bus_offline_companions(handle, 0, (void *)false, (void **)&errdev); + if (errdev) { + errdev = NULL; acpi_walk_namespace(ACPI_TYPE_ANY, handle, ACPI_UINT32_MAX, - acpi_bus_online_companions, NULL, NULL, - NULL); + NULL, acpi_bus_offline_companions, + (void *)true , (void **)&errdev); + if (!errdev || acpi_force_hot_remove) + acpi_bus_offline_companions(handle, 0, (void *)true, + (void **)&errdev); - unlock_device_hotplug(); + if (errdev && !acpi_force_hot_remove) { + dev_warn(errdev, "Offline failed.\n"); + acpi_bus_online_companions(handle, 0, NULL, NULL); + acpi_walk_namespace(ACPI_TYPE_ANY, handle, + ACPI_UINT32_MAX, + acpi_bus_online_companions, NULL, + NULL, NULL); - put_device(&device->dev); - return -EBUSY; + unlock_device_hotplug(); + + put_device(&device->dev); + return -EBUSY; + } } ACPI_DEBUG_PRINT((ACPI_DB_INFO, From 242831eb15a06fa4414eaa705fdc6dd432ab98d1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 May 2013 12:58:46 +0200 Subject: [PATCH 14/18] Memory hotplug / ACPI: Simplify memory removal Now that the memory offlining should be taken care of by the companion device offlining code in acpi_scan_hot_remove(), the ACPI memory hotplug driver doesn't need to offline it in remove_memory() any more. Moreover, since the return value of remove_memory() is not used, it's better to make it be a void function and trigger a BUG() if the memory scheduled for removal is not offline. Change the code in accordance with the above observations. Signed-off-by: Rafael J. Wysocki Reviewed-by: Toshi Kani --- drivers/acpi/acpi_memhotplug.c | 13 ++----- include/linux/memory_hotplug.h | 2 +- mm/memory_hotplug.c | 71 ++++------------------------------ 3 files changed, 12 insertions(+), 74 deletions(-) diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 5590db12028e..c711d1144044 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -271,13 +271,11 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) return 0; } -static int acpi_memory_remove_memory(struct acpi_memory_device *mem_device) +static void acpi_memory_remove_memory(struct acpi_memory_device *mem_device) { acpi_handle handle = mem_device->device->handle; - int result = 0, nid; struct acpi_memory_info *info, *n; - - nid = acpi_get_node(handle); + int nid = acpi_get_node(handle); list_for_each_entry_safe(info, n, &mem_device->res_list, list) { if (!info->enabled) @@ -287,15 +285,10 @@ static int acpi_memory_remove_memory(struct acpi_memory_device *mem_device) nid = memory_add_physaddr_to_nid(info->start_addr); acpi_unbind_memory_blocks(info, handle); - result = remove_memory(nid, info->start_addr, info->length); - if (result) - return result; - + remove_memory(nid, info->start_addr, info->length); list_del(&info->list); kfree(info); } - - return result; } static void acpi_memory_device_free(struct acpi_memory_device *mem_device) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ae5480a00963..00569fb4ed6a 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -252,7 +252,7 @@ extern int add_memory(int nid, u64 start, u64 size); extern int arch_add_memory(int nid, u64 start, u64 size); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); -extern int remove_memory(int nid, u64 start, u64 size); +extern void remove_memory(int nid, u64 start, u64 size); extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, int nr_pages); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a39841d240e8..7026fbc42aaa 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1670,24 +1670,6 @@ int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, } #ifdef CONFIG_MEMORY_HOTREMOVE -/** - * offline_memory_block_cb - callback function for offlining memory block - * @mem: the memory block to be offlined - * @arg: buffer to hold error msg - * - * Always return 0, and put the error msg in arg if any. - */ -static int offline_memory_block_cb(struct memory_block *mem, void *arg) -{ - int *ret = arg; - int error = device_offline(&mem->dev); - - if (error != 0 && *ret == 0) - *ret = error; - - return 0; -} - static int is_memblock_offlined_cb(struct memory_block *mem, void *arg) { int ret = !is_memblock_offlined(mem); @@ -1813,54 +1795,22 @@ void try_offline_node(int nid) } EXPORT_SYMBOL(try_offline_node); -int __ref remove_memory(int nid, u64 start, u64 size) +void __ref remove_memory(int nid, u64 start, u64 size) { - unsigned long start_pfn, end_pfn; - int ret = 0; - int retry = 1; - - start_pfn = PFN_DOWN(start); - end_pfn = PFN_UP(start + size - 1); - - /* - * When CONFIG_MEMCG is on, one memory block may be used by other - * blocks to store page cgroup when onlining pages. But we don't know - * in what order pages are onlined. So we iterate twice to offline - * memory: - * 1st iterate: offline every non primary memory block. - * 2nd iterate: offline primary (i.e. first added) memory block. - */ -repeat: - walk_memory_range(start_pfn, end_pfn, &ret, - offline_memory_block_cb); - if (ret) { - if (!retry) - return ret; - - retry = 0; - ret = 0; - goto repeat; - } + int ret; lock_memory_hotplug(); /* - * we have offlined all memory blocks like this: - * 1. lock memory hotplug - * 2. offline a memory block - * 3. unlock memory hotplug - * - * repeat step1-3 to offline the memory block. All memory blocks - * must be offlined before removing memory. But we don't hold the - * lock in the whole operation. So we should check whether all - * memory blocks are offlined. + * All memory blocks must be offlined before removing memory. Check + * whether all memory blocks in question are offline and trigger a BUG() + * if this is not the case. */ - - ret = walk_memory_range(start_pfn, end_pfn, NULL, + ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, is_memblock_offlined_cb); if (ret) { unlock_memory_hotplug(); - return ret; + BUG(); } /* remove memmap entry */ @@ -1871,17 +1821,12 @@ repeat: try_offline_node(nid); unlock_memory_hotplug(); - - return 0; } #else int offline_pages(unsigned long start_pfn, unsigned long nr_pages) { return -EINVAL; } -int remove_memory(int nid, u64 start, u64 size) -{ - return -EINVAL; -} +void remove_memory(int nid, u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ EXPORT_SYMBOL_GPL(remove_memory); From 173a5a4c909789fcd57d00355d2237618a3824a4 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 31 May 2013 11:36:08 +0800 Subject: [PATCH 15/18] ACPI / processor: Fix potential NULL pointer dereference in acpi_processor_add() In acpi_processor_add(), get_cpu_device() may return NULL in some cases which is then passed to acpi_bind_one() and that will case a NULL pointer dereference to occur. Add a check to prevent that from happening. [rjw: Changelog] Signed-off-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_processor.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 157e7389a5ff..e9b01e35ac37 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -393,6 +393,11 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device, per_cpu(processors, pr->id) = pr; dev = get_cpu_device(pr->id); + if (!dev) { + result = -ENODEV; + goto err; + } + result = acpi_bind_one(dev, pr->handle); if (result) goto err; From aba6efc47133af4941cda16e690f71b7ad894da2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 1 Jun 2013 22:24:07 +0200 Subject: [PATCH 16/18] Memory hotplug: Move alternative function definitions to header Move the definitions of offline_pages() and remove_memory() for CONFIG_MEMORY_HOTREMOVE to memory_hotplug.h, where they belong, and make them static inline. Signed-off-by: Rafael J. Wysocki --- include/linux/memory_hotplug.h | 9 +++++++++ mm/memory_hotplug.c | 8 +------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 00569fb4ed6a..dd38e62b84d2 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -234,6 +234,8 @@ static inline void unlock_memory_hotplug(void) {} extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages); extern void try_offline_node(int nid); +extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); +extern void remove_memory(int nid, u64 start, u64 size); #else static inline int is_mem_section_removable(unsigned long pfn, @@ -243,6 +245,13 @@ static inline int is_mem_section_removable(unsigned long pfn, } static inline void try_offline_node(int nid) {} + +static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages) +{ + return -EINVAL; +} + +static inline void remove_memory(int nid, u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 7026fbc42aaa..490e3d401e2c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1822,11 +1822,5 @@ void __ref remove_memory(int nid, u64 start, u64 size) unlock_memory_hotplug(); } -#else -int offline_pages(unsigned long start_pfn, unsigned long nr_pages) -{ - return -EINVAL; -} -void remove_memory(int nid, u64 start, u64 size) {} -#endif /* CONFIG_MEMORY_HOTREMOVE */ EXPORT_SYMBOL_GPL(remove_memory); +#endif /* CONFIG_MEMORY_HOTREMOVE */ From c655affbd524d0105978ecd696c3bb8a281b418b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 7 Jun 2013 13:13:31 +0200 Subject: [PATCH 17/18] ACPI / cpufreq: Add ACPI processor device IDs to acpi-cpufreq After commit ac212b6 (ACPI / processor: Use common hotplug infrastructure) the acpi-cpufreq module is not loaded automatically by udev which fails to match it against the x86cpu modalias. Still, it can be matched against ACPI processor device IDs, which even makes more sense, because it depends on the ACPI processor driver that uses those device IDs to bind to processor devices. For this reason, add ACPI processor device IDs to acpi-cpufreq. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/acpi-cpufreq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 11b8b4b54ceb..4a9ca0149719 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -1034,4 +1034,11 @@ static const struct x86_cpu_id acpi_cpufreq_ids[] = { }; MODULE_DEVICE_TABLE(x86cpu, acpi_cpufreq_ids); +static const struct acpi_device_id processor_device_ids[] = { + {ACPI_PROCESSOR_OBJECT_HID, }, + {ACPI_PROCESSOR_DEVICE_HID, }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, processor_device_ids); + MODULE_ALIAS("acpi"); From 08f502c1c343031f0d126bd00e87dede38269d12 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 18 Jun 2013 15:06:45 -0600 Subject: [PATCH 18/18] ACPI: Do not use CONFIG_ACPI_HOTPLUG_MEMORY_MODULE CONFIG_ACPI_HOTPLUG_MEMORY has been changed to bool (y/n), and its module option is no longer valid. So, stop using CONFIG_ACPI_HOTPLUG_MEMORY_MODULE. Signed-off-by: Toshi Kani Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 17b5b5967641..353ba256f368 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -352,8 +352,7 @@ extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, /* Enable _OST when all relevant hotplug operations are enabled */ #if defined(CONFIG_ACPI_HOTPLUG_CPU) && \ - (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \ - defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) && \ + defined(CONFIG_ACPI_HOTPLUG_MEMORY) && \ defined(CONFIG_ACPI_CONTAINER) #define ACPI_HOTPLUG_OST #endif