OpenCloudOS-Kernel/drivers/cxl/acpi.c

339 lines
8.5 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
#include <linux/platform_device.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/acpi.h>
#include <linux/pci.h>
#include "cxlpci.h"
#include "cxl.h"
/* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */
#define CFMWS_INTERLEAVE_WAYS(x) (1 << (x)->interleave_ways)
#define CFMWS_INTERLEAVE_GRANULARITY(x) ((x)->granularity + 8)
static unsigned long cfmws_to_decoder_flags(int restrictions)
{
unsigned long flags = CXL_DECODER_F_ENABLE;
if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE2)
flags |= CXL_DECODER_F_TYPE2;
if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE3)
flags |= CXL_DECODER_F_TYPE3;
if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_VOLATILE)
flags |= CXL_DECODER_F_RAM;
if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_PMEM)
flags |= CXL_DECODER_F_PMEM;
if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_FIXED)
flags |= CXL_DECODER_F_LOCK;
return flags;
}
static int cxl_acpi_cfmws_verify(struct device *dev,
struct acpi_cedt_cfmws *cfmws)
{
int expected_len;
if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) {
dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n");
return -EINVAL;
}
if (!IS_ALIGNED(cfmws->base_hpa, SZ_256M)) {
dev_err(dev, "CFMWS Base HPA not 256MB aligned\n");
return -EINVAL;
}
if (!IS_ALIGNED(cfmws->window_size, SZ_256M)) {
dev_err(dev, "CFMWS Window Size not 256MB aligned\n");
return -EINVAL;
}
if (CFMWS_INTERLEAVE_WAYS(cfmws) > CXL_DECODER_MAX_INTERLEAVE) {
dev_err(dev, "CFMWS Interleave Ways (%d) too large\n",
CFMWS_INTERLEAVE_WAYS(cfmws));
return -EINVAL;
}
expected_len = struct_size((cfmws), interleave_targets,
CFMWS_INTERLEAVE_WAYS(cfmws));
if (cfmws->header.length < expected_len) {
dev_err(dev, "CFMWS length %d less than expected %d\n",
cfmws->header.length, expected_len);
return -EINVAL;
}
if (cfmws->header.length > expected_len)
dev_dbg(dev, "CFMWS length %d greater than expected %d\n",
cfmws->header.length, expected_len);
return 0;
}
struct cxl_cfmws_context {
struct device *dev;
struct cxl_port *root_port;
};
static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
int target_map[CXL_DECODER_MAX_INTERLEAVE];
struct cxl_cfmws_context *ctx = arg;
struct cxl_port *root_port = ctx->root_port;
struct device *dev = ctx->dev;
struct acpi_cedt_cfmws *cfmws;
struct cxl_decoder *cxld;
int rc, i;
cfmws = (struct acpi_cedt_cfmws *) header;
rc = cxl_acpi_cfmws_verify(dev, cfmws);
if (rc) {
dev_err(dev, "CFMWS range %#llx-%#llx not registered\n",
cfmws->base_hpa,
cfmws->base_hpa + cfmws->window_size - 1);
return 0;
}
for (i = 0; i < CFMWS_INTERLEAVE_WAYS(cfmws); i++)
target_map[i] = cfmws->interleave_targets[i];
cxld = cxl_root_decoder_alloc(root_port, CFMWS_INTERLEAVE_WAYS(cfmws));
if (IS_ERR(cxld))
return 0;
cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions);
cxld->target_type = CXL_DECODER_EXPANDER;
cxld->platform_res = (struct resource)DEFINE_RES_MEM(cfmws->base_hpa,
cfmws->window_size);
cxld->interleave_ways = CFMWS_INTERLEAVE_WAYS(cfmws);
cxld->interleave_granularity = CFMWS_INTERLEAVE_GRANULARITY(cfmws);
rc = cxl_decoder_add(cxld, target_map);
if (rc)
put_device(&cxld->dev);
else
rc = cxl_decoder_autoremove(dev, cxld);
if (rc) {
dev_err(dev, "Failed to add decoder for %pr\n",
&cxld->platform_res);
return 0;
}
dev_dbg(dev, "add: %s node: %d range %pr\n", dev_name(&cxld->dev),
phys_to_target_node(cxld->platform_res.start),
&cxld->platform_res);
return 0;
}
tools/testing/cxl: Introduce a mocked-up CXL port hierarchy Create an environment for CXL plumbing unit tests. Especially when it comes to an algorithm for HDM Decoder (Host-managed Device Memory Decoder) programming, the availability of an in-kernel-tree emulation environment for CXL configuration complexity and corner cases speeds development and deters regressions. The approach taken mirrors what was done for tools/testing/nvdimm/. I.e. an external module, cxl_test.ko built out of the tools/testing/cxl/ directory, provides mock implementations of kernel APIs and kernel objects to simulate a real world device hierarchy. One feedback for the tools/testing/nvdimm/ proposal was "why not do this in QEMU?". In fact, the CXL development community has developed a QEMU model for CXL [1]. However, there are a few blocking issues that keep QEMU from being a tight fit for topology + provisioning unit tests: 1/ The QEMU community has yet to show interest in merging any of this support that has had patches on the list since November 2020. So, testing CXL to date involves building custom QEMU with out-of-tree patches. 2/ CXL mechanisms like cross-host-bridge interleave do not have a clear path to be emulated by QEMU without major infrastructure work. This is easier to achieve with the alloc_mock_res() approach taken in this patch to shortcut-define emulated system physical address ranges with interleave behavior. The QEMU enabling has been critical to get the driver off the ground, and may still move forward, but it does not address the ongoing needs of a regression testing environment and test driven development. This patch adds an ACPI CXL Platform definition with emulated CXL multi-ported host-bridges. A follow on patch adds emulated memory expander devices. Acked-by: Ben Widawsky <ben.widawsky@intel.com> Reported-by: Vishal Verma <vishal.l.verma@intel.com> Link: https://lore.kernel.org/r/20210202005948.241655-1-ben.widawsky@intel.com [1] Link: https://lore.kernel.org/r/163164680798.2831381.838684634806668012.stgit@dwillia2-desk3.amr.corp.intel.com Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-09-15 03:14:22 +08:00
__mock struct acpi_device *to_cxl_host_bridge(struct device *host,
struct device *dev)
{
struct acpi_device *adev = to_acpi_device(dev);
if (!acpi_pci_find_root(adev->handle))
return NULL;
if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0)
return adev;
return NULL;
}
/*
* A host bridge is a dport to a CFMWS decode and it is a uport to the
* dport (PCIe Root Ports) in the host bridge.
*/
static int add_host_bridge_uport(struct device *match, void *arg)
{
struct cxl_port *root_port = arg;
struct device *host = root_port->dev.parent;
tools/testing/cxl: Introduce a mocked-up CXL port hierarchy Create an environment for CXL plumbing unit tests. Especially when it comes to an algorithm for HDM Decoder (Host-managed Device Memory Decoder) programming, the availability of an in-kernel-tree emulation environment for CXL configuration complexity and corner cases speeds development and deters regressions. The approach taken mirrors what was done for tools/testing/nvdimm/. I.e. an external module, cxl_test.ko built out of the tools/testing/cxl/ directory, provides mock implementations of kernel APIs and kernel objects to simulate a real world device hierarchy. One feedback for the tools/testing/nvdimm/ proposal was "why not do this in QEMU?". In fact, the CXL development community has developed a QEMU model for CXL [1]. However, there are a few blocking issues that keep QEMU from being a tight fit for topology + provisioning unit tests: 1/ The QEMU community has yet to show interest in merging any of this support that has had patches on the list since November 2020. So, testing CXL to date involves building custom QEMU with out-of-tree patches. 2/ CXL mechanisms like cross-host-bridge interleave do not have a clear path to be emulated by QEMU without major infrastructure work. This is easier to achieve with the alloc_mock_res() approach taken in this patch to shortcut-define emulated system physical address ranges with interleave behavior. The QEMU enabling has been critical to get the driver off the ground, and may still move forward, but it does not address the ongoing needs of a regression testing environment and test driven development. This patch adds an ACPI CXL Platform definition with emulated CXL multi-ported host-bridges. A follow on patch adds emulated memory expander devices. Acked-by: Ben Widawsky <ben.widawsky@intel.com> Reported-by: Vishal Verma <vishal.l.verma@intel.com> Link: https://lore.kernel.org/r/20210202005948.241655-1-ben.widawsky@intel.com [1] Link: https://lore.kernel.org/r/163164680798.2831381.838684634806668012.stgit@dwillia2-desk3.amr.corp.intel.com Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-09-15 03:14:22 +08:00
struct acpi_device *bridge = to_cxl_host_bridge(host, match);
struct acpi_pci_root *pci_root;
struct cxl_dport *dport;
struct cxl_port *port;
int rc;
if (!bridge)
return 0;
dport = cxl_find_dport_by_dev(root_port, match);
if (!dport) {
dev_dbg(host, "host bridge expected and not found\n");
return 0;
}
/*
* Note that this lookup already succeeded in
* to_cxl_host_bridge(), so no need to check for failure here
*/
pci_root = acpi_pci_find_root(bridge->handle);
rc = devm_cxl_register_pci_bus(host, match, pci_root->bus);
if (rc)
return rc;
port = devm_cxl_add_port(host, match, dport->component_reg_phys,
root_port);
if (IS_ERR(port))
return PTR_ERR(port);
dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev));
cxl/port: Add a driver for 'struct cxl_port' objects The need for a CXL port driver and a dedicated cxl_bus_type is driven by a need to simultaneously support 2 independent physical memory decode domains (cache coherent CXL.mem and uncached PCI.mmio) that also intersect at a single PCIe device node. A CXL Port is a device that advertises a CXL Component Register block with an "HDM Decoder Capability Structure". >From Documentation/driver-api/cxl/memory-devices.rst: Similar to how a RAID driver takes disk objects and assembles them into a new logical device, the CXL subsystem is tasked to take PCIe and ACPI objects and assemble them into a CXL.mem decode topology. The need for runtime configuration of the CXL.mem topology is also similar to RAID in that different environments with the same hardware configuration may decide to assemble the topology in contrasting ways. One may choose performance (RAID0) striping memory across multiple Host Bridges and endpoints while another may opt for fault tolerance and disable any striping in the CXL.mem topology. The port driver identifies whether an endpoint Memory Expander is connected to a CXL topology. If an active (bound to the 'cxl_port' driver) CXL Port is not found at every PCIe Switch Upstream port and an active "root" CXL Port then the device is just a plain PCIe endpoint only capable of participating in PCI.mmio and DMA cycles, not CXL.mem coherent interleave sets. The 'cxl_port' driver lets the CXL subsystem leverage driver-core infrastructure for setup and teardown of register resources and communicating device activation status to userspace. The cxl_bus_type can rendezvous the async arrival of platform level CXL resources (via the 'cxl_acpi' driver) with the asynchronous enumeration of Memory Expander endpoints, while also implementing a hierarchical locking model independent of the associated 'struct pci_dev' locking model. The locking for dport and decoder enumeration is now handled in the core rather than callers. For now the port driver only enumerates and registers CXL resources (downstream port metadata and decoder resources) later it will be used to take action on its decoders in response to CXL.mem region provisioning requests. Note1: cxlpci.h has long depended on pci.h, but port.c was the first to not include pci.h. Carry that dependency in cxlpci.h. Note2: cxl port enumeration and probing complicates CXL subsystem init to the point that it helps to have centralized debug logging of probe events in cxl_bus_probe(). Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Co-developed-by: Dan Williams <dan.j.williams@intel.com> Link: https://lore.kernel.org/r/164374948116.464348.1772618057599155408.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-02-02 05:07:51 +08:00
return 0;
}
struct cxl_chbs_context {
struct device *dev;
unsigned long long uid;
resource_size_t chbcr;
};
static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg,
const unsigned long end)
{
struct cxl_chbs_context *ctx = arg;
struct acpi_cedt_chbs *chbs;
if (ctx->chbcr)
return 0;
chbs = (struct acpi_cedt_chbs *) header;
if (ctx->uid != chbs->uid)
return 0;
ctx->chbcr = chbs->base;
return 0;
}
static int add_host_bridge_dport(struct device *match, void *arg)
{
acpi_status status;
unsigned long long uid;
struct cxl_dport *dport;
struct cxl_chbs_context ctx;
struct cxl_port *root_port = arg;
struct device *host = root_port->dev.parent;
tools/testing/cxl: Introduce a mocked-up CXL port hierarchy Create an environment for CXL plumbing unit tests. Especially when it comes to an algorithm for HDM Decoder (Host-managed Device Memory Decoder) programming, the availability of an in-kernel-tree emulation environment for CXL configuration complexity and corner cases speeds development and deters regressions. The approach taken mirrors what was done for tools/testing/nvdimm/. I.e. an external module, cxl_test.ko built out of the tools/testing/cxl/ directory, provides mock implementations of kernel APIs and kernel objects to simulate a real world device hierarchy. One feedback for the tools/testing/nvdimm/ proposal was "why not do this in QEMU?". In fact, the CXL development community has developed a QEMU model for CXL [1]. However, there are a few blocking issues that keep QEMU from being a tight fit for topology + provisioning unit tests: 1/ The QEMU community has yet to show interest in merging any of this support that has had patches on the list since November 2020. So, testing CXL to date involves building custom QEMU with out-of-tree patches. 2/ CXL mechanisms like cross-host-bridge interleave do not have a clear path to be emulated by QEMU without major infrastructure work. This is easier to achieve with the alloc_mock_res() approach taken in this patch to shortcut-define emulated system physical address ranges with interleave behavior. The QEMU enabling has been critical to get the driver off the ground, and may still move forward, but it does not address the ongoing needs of a regression testing environment and test driven development. This patch adds an ACPI CXL Platform definition with emulated CXL multi-ported host-bridges. A follow on patch adds emulated memory expander devices. Acked-by: Ben Widawsky <ben.widawsky@intel.com> Reported-by: Vishal Verma <vishal.l.verma@intel.com> Link: https://lore.kernel.org/r/20210202005948.241655-1-ben.widawsky@intel.com [1] Link: https://lore.kernel.org/r/163164680798.2831381.838684634806668012.stgit@dwillia2-desk3.amr.corp.intel.com Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-09-15 03:14:22 +08:00
struct acpi_device *bridge = to_cxl_host_bridge(host, match);
if (!bridge)
return 0;
status = acpi_evaluate_integer(bridge->handle, METHOD_NAME__UID, NULL,
&uid);
if (status != AE_OK) {
dev_err(host, "unable to retrieve _UID of %s\n",
dev_name(match));
return -ENODEV;
}
ctx = (struct cxl_chbs_context) {
.dev = host,
.uid = uid,
};
acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbcr, &ctx);
if (ctx.chbcr == 0) {
dev_warn(host, "No CHBS found for Host Bridge: %s\n",
dev_name(match));
return 0;
}
dport = devm_cxl_add_dport(root_port, match, uid, ctx.chbcr);
if (IS_ERR(dport)) {
dev_err(host, "failed to add downstream port: %s\n",
dev_name(match));
return PTR_ERR(dport);
}
dev_dbg(host, "add dport%llu: %s\n", uid, dev_name(match));
return 0;
}
static int add_root_nvdimm_bridge(struct device *match, void *data)
{
struct cxl_decoder *cxld;
struct cxl_port *root_port = data;
struct cxl_nvdimm_bridge *cxl_nvb;
struct device *host = root_port->dev.parent;
if (!is_root_decoder(match))
return 0;
cxld = to_cxl_decoder(match);
if (!(cxld->flags & CXL_DECODER_F_PMEM))
return 0;
cxl_nvb = devm_cxl_add_nvdimm_bridge(host, root_port);
if (IS_ERR(cxl_nvb)) {
dev_dbg(host, "failed to register pmem\n");
return PTR_ERR(cxl_nvb);
}
dev_dbg(host, "%s: add: %s\n", dev_name(&root_port->dev),
dev_name(&cxl_nvb->dev));
return 1;
}
static int cxl_acpi_probe(struct platform_device *pdev)
{
int rc;
struct cxl_port *root_port;
struct device *host = &pdev->dev;
struct acpi_device *adev = ACPI_COMPANION(host);
struct cxl_cfmws_context ctx;
root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL);
if (IS_ERR(root_port))
return PTR_ERR(root_port);
dev_dbg(host, "add: %s\n", dev_name(&root_port->dev));
rc = bus_for_each_dev(adev->dev.bus, NULL, root_port,
add_host_bridge_dport);
if (rc < 0)
return rc;
ctx = (struct cxl_cfmws_context) {
.dev = host,
.root_port = root_port,
};
acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, cxl_parse_cfmws, &ctx);
/*
* Root level scanned with host-bridge as dports, now scan host-bridges
* for their role as CXL uports to their CXL-capable PCIe Root Ports.
*/
rc = bus_for_each_dev(adev->dev.bus, NULL, root_port,
add_host_bridge_uport);
if (rc < 0)
return rc;
if (IS_ENABLED(CONFIG_CXL_PMEM))
rc = device_for_each_child(&root_port->dev, root_port,
add_root_nvdimm_bridge);
if (rc < 0)
return rc;
cxl/mem: Add the cxl_mem driver At this point the subsystem can enumerate all CXL ports (CXL.mem decode resources in upstream switch ports and host bridges) in a system. The last mile is connecting those ports to endpoints. The cxl_mem driver connects an endpoint device to the platform CXL.mem protoctol decode-topology. At ->probe() time it walks its device-topology-ancestry and adds a CXL Port object at every Upstream Port hop until it gets to CXL root. The CXL root object is only present after a platform firmware driver registers platform CXL resources. For ACPI based platform this is managed by the ACPI0017 device and the cxl_acpi driver. The ports are registered such that disabling a given port automatically unregisters all descendant ports, and the chain can only be registered after the root is established. Given ACPI device scanning may run asynchronously compared to PCI device scanning the root driver is tasked with rescanning the bus after the root successfully probes. Conversely if any ports in a chain between the root and an endpoint becomes disconnected it subsequently triggers the endpoint to unregister. Given lock depenedencies the endpoint unregistration happens in a workqueue asynchronously. If userspace cares about synchronizing delayed work after port events the /sys/bus/cxl/flush attribute is available for that purpose. Reported-by: Randy Dunlap <rdunlap@infradead.org> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [djbw: clarify changelog, rework hotplug support] Link: https://lore.kernel.org/r/164398782997.903003.9725273241627693186.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2022-02-04 23:18:31 +08:00
/* In case PCI is scanned before ACPI re-trigger memdev attach */
return cxl_bus_rescan();
}
static const struct acpi_device_id cxl_acpi_ids[] = {
{ "ACPI0017" },
tools/testing/cxl: Introduce a mocked-up CXL port hierarchy Create an environment for CXL plumbing unit tests. Especially when it comes to an algorithm for HDM Decoder (Host-managed Device Memory Decoder) programming, the availability of an in-kernel-tree emulation environment for CXL configuration complexity and corner cases speeds development and deters regressions. The approach taken mirrors what was done for tools/testing/nvdimm/. I.e. an external module, cxl_test.ko built out of the tools/testing/cxl/ directory, provides mock implementations of kernel APIs and kernel objects to simulate a real world device hierarchy. One feedback for the tools/testing/nvdimm/ proposal was "why not do this in QEMU?". In fact, the CXL development community has developed a QEMU model for CXL [1]. However, there are a few blocking issues that keep QEMU from being a tight fit for topology + provisioning unit tests: 1/ The QEMU community has yet to show interest in merging any of this support that has had patches on the list since November 2020. So, testing CXL to date involves building custom QEMU with out-of-tree patches. 2/ CXL mechanisms like cross-host-bridge interleave do not have a clear path to be emulated by QEMU without major infrastructure work. This is easier to achieve with the alloc_mock_res() approach taken in this patch to shortcut-define emulated system physical address ranges with interleave behavior. The QEMU enabling has been critical to get the driver off the ground, and may still move forward, but it does not address the ongoing needs of a regression testing environment and test driven development. This patch adds an ACPI CXL Platform definition with emulated CXL multi-ported host-bridges. A follow on patch adds emulated memory expander devices. Acked-by: Ben Widawsky <ben.widawsky@intel.com> Reported-by: Vishal Verma <vishal.l.verma@intel.com> Link: https://lore.kernel.org/r/20210202005948.241655-1-ben.widawsky@intel.com [1] Link: https://lore.kernel.org/r/163164680798.2831381.838684634806668012.stgit@dwillia2-desk3.amr.corp.intel.com Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2021-09-15 03:14:22 +08:00
{ },
};
MODULE_DEVICE_TABLE(acpi, cxl_acpi_ids);
static struct platform_driver cxl_acpi_driver = {
.probe = cxl_acpi_probe,
.driver = {
.name = KBUILD_MODNAME,
.acpi_match_table = cxl_acpi_ids,
},
};
module_platform_driver(cxl_acpi_driver);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CXL);
MODULE_IMPORT_NS(ACPI);