From 68cdd3d2af6964dae2f8d9b53ee94f740dcbda35 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jan 2022 16:28:44 -0800 Subject: [PATCH 01/48] cxl: Rename CXL_MEM to CXL_PCI The cxl_mem module was renamed cxl_pci in commit 21e9f76733a8 ("cxl: Rename mem to pci"). In preparation for adding an ancillary driver for cxl_memdev devices (registered on the cxl bus by cxl_pci), go ahead and rename CONFIG_CXL_MEM to CONFIG_CXL_PCI. Free up the CXL_MEM name for that new driver to manage CXL.mem endpoint operations. Suggested-by: Dan Williams Reviewed-by: Jonathan Cameron Signed-off-by: Ben Widawsky Link: https://lore.kernel.org/r/164298412409.3018233.12407355692407890752.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/Kconfig | 23 ++++++++++++----------- drivers/cxl/Makefile | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 67c91378f2dd..ef05e96f8f97 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -13,25 +13,26 @@ menuconfig CXL_BUS if CXL_BUS -config CXL_MEM - tristate "CXL.mem: Memory Devices" +config CXL_PCI + tristate "PCI manageability" default CXL_BUS help - The CXL.mem protocol allows a device to act as a provider of - "System RAM" and/or "Persistent Memory" that is fully coherent - as if the memory was attached to the typical CPU memory - controller. + The CXL specification defines a "CXL memory device" sub-class in the + PCI "memory controller" base class of devices. Device's identified by + this class code provide support for volatile and / or persistent + memory to be mapped into the system address map (Host-managed Device + Memory (HDM)). - Say 'y/m' to enable a driver that will attach to CXL.mem devices for - configuration and management primarily via the mailbox interface. See - Chapter 2.3 Type 3 CXL Device in the CXL 2.0 specification for more - details. + Say 'y/m' to enable a driver that will attach to CXL memory expander + devices enumerated by the memory device class code for configuration + and management primarily via the mailbox interface. See Chapter 2.3 + Type 3 CXL Device in the CXL 2.0 specification for more details. If unsure say 'm'. config CXL_MEM_RAW_COMMANDS bool "RAW Command Interface for Memory Devices" - depends on CXL_MEM + depends on CXL_PCI help Enable CXL RAW command interface. diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index d1aaabc940f3..cf07ae6cea17 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CXL_BUS) += core/ -obj-$(CONFIG_CXL_MEM) += cxl_pci.o +obj-$(CONFIG_CXL_PCI) += cxl_pci.o obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o From 229e8828c206be8fa0a159f6ff71e3b1a0484233 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 31 Jan 2022 15:51:45 -0800 Subject: [PATCH 02/48] cxl/pci: Implement Interface Ready Timeout The original driver implementation used the doorbell timeout for the Mailbox Interface Ready bit to piggy back off of, since the latter does not have a defined timeout. This functionality, introduced in commit 8adaf747c9f0 ("cxl/mem: Find device capabilities"), needs improvement as the recent "Add Mailbox Ready Time" ECN timeout indicates that the mailbox ready time can be significantly longer that 2 seconds. While the specification limits the maximum timeout to 256s, the cxl_pci driver gives up on the mailbox after 60s. This value corresponds with important timeout values already present in the kernel. A module parameter is provided as an emergency override and represents the default Linux policy for all devices. Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron [djbw: add modparam, drop check_device_status()] Co-developed-by: Dan Williams Link: https://lore.kernel.org/r/164367306565.208548.1932299464604450843.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 8dc91fd3396a..cc0cdd7e9de3 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ #include +#include #include +#include #include #include #include @@ -35,6 +37,20 @@ /* CXL 2.0 - 8.2.8.4 */ #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) +/* + * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to + * dictate how long to wait for the mailbox to become ready. The new + * field allows the device to tell software the amount of time to wait + * before mailbox ready. This field per the spec theoretically allows + * for up to 255 seconds. 255 seconds is unreasonably long, its longer + * than the maximum SATA port link recovery wait. Default to 60 seconds + * until someone builds a CXL device that needs more time in practice. + */ +static unsigned short mbox_ready_timeout = 60; +module_param(mbox_ready_timeout, ushort, 0644); +MODULE_PARM_DESC(mbox_ready_timeout, + "seconds to wait for mailbox ready status"); + static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) { const unsigned long start = jiffies; @@ -281,6 +297,25 @@ static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) { const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); + unsigned long timeout; + u64 md_status; + + timeout = jiffies + mbox_ready_timeout * HZ; + do { + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (md_status & CXLMDEV_MBOX_IF_READY) + break; + if (msleep_interruptible(100)) + break; + } while (!time_after(jiffies, timeout)); + + if (!(md_status & CXLMDEV_MBOX_IF_READY)) { + dev_err(cxlds->dev, + "timeout awaiting mailbox ready, device state:%s%s\n", + md_status & CXLMDEV_DEV_FATAL ? " fatal" : "", + md_status & CXLMDEV_FW_HALT ? " firmware-halt" : ""); + return -EIO; + } cxlds->mbox_send = cxl_pci_mbox_send; cxlds->payload_size = From 4f195ee73ade1adf8326e5ed5fb271da51778991 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:28:54 -0800 Subject: [PATCH 03/48] cxl/pci: Defer mailbox status checks to command timeouts Device status can change without warning at any point in time. This effectively means that no amount of status checking before a command is submitted can guarantee that the device is not in an error condition when the command is later submitted. The clearest signal that a device is not able to process commands is if it fails to process commands. With the above understanding in hand, update cxl_pci_setup_mailbox() to validate the readiness of the mailbox once at the beginning of time, and then use timeouts and busy sequencing errors as the only occasions to report status. Just as before, unless and until the driver gains a reset recovery path, doorbell clearing failures by the device are fatal to mailbox operations. Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164298413480.3018233.9643395389297971819.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 134 ++++++++++++---------------------------------- 1 file changed, 33 insertions(+), 101 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index cc0cdd7e9de3..baeea0c2e619 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -73,14 +73,16 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) return 0; } -static void cxl_pci_mbox_timeout(struct cxl_dev_state *cxlds, - struct cxl_mbox_cmd *mbox_cmd) -{ - struct device *dev = cxlds->dev; +#define cxl_err(dev, status, msg) \ + dev_err_ratelimited(dev, msg ", device state %s%s\n", \ + status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ + status & CXLMDEV_FW_HALT ? " firmware-halt" : "") - dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n", - mbox_cmd->opcode, mbox_cmd->size_in); -} +#define cxl_cmd_err(dev, cmd, status, msg) \ + dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ + (cmd)->opcode, \ + status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ + status & CXLMDEV_FW_HALT ? " firmware-halt" : "") /** * __cxl_pci_mbox_send_cmd() - Execute a mailbox command @@ -134,7 +136,11 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, /* #1 */ if (cxl_doorbell_busy(cxlds)) { - dev_err_ratelimited(dev, "Mailbox re-busy after acquiring\n"); + u64 md_status = + readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + + cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, + "mailbox queue busy"); return -EBUSY; } @@ -160,7 +166,9 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, /* #5 */ rc = cxl_pci_mbox_wait_for_doorbell(cxlds); if (rc == -ETIMEDOUT) { - cxl_pci_mbox_timeout(cxlds, mbox_cmd); + u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + + cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout"); return rc; } @@ -198,98 +206,13 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, return 0; } -/** - * cxl_pci_mbox_get() - Acquire exclusive access to the mailbox. - * @cxlds: The device state to gain access to. - * - * Context: Any context. Takes the mbox_mutex. - * Return: 0 if exclusive access was acquired. - */ -static int cxl_pci_mbox_get(struct cxl_dev_state *cxlds) -{ - struct device *dev = cxlds->dev; - u64 md_status; - int rc; - - mutex_lock_io(&cxlds->mbox_mutex); - - /* - * XXX: There is some amount of ambiguity in the 2.0 version of the spec - * around the mailbox interface ready (8.2.8.5.1.1). The purpose of the - * bit is to allow firmware running on the device to notify the driver - * that it's ready to receive commands. It is unclear if the bit needs - * to be read for each transaction mailbox, ie. the firmware can switch - * it on and off as needed. Second, there is no defined timeout for - * mailbox ready, like there is for the doorbell interface. - * - * Assumptions: - * 1. The firmware might toggle the Mailbox Interface Ready bit, check - * it for every command. - * - * 2. If the doorbell is clear, the firmware should have first set the - * Mailbox Interface Ready bit. Therefore, waiting for the doorbell - * to be ready is sufficient. - */ - rc = cxl_pci_mbox_wait_for_doorbell(cxlds); - if (rc) { - dev_warn(dev, "Mailbox interface not ready\n"); - goto out; - } - - md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); - if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) { - dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n"); - rc = -EBUSY; - goto out; - } - - /* - * Hardware shouldn't allow a ready status but also have failure bits - * set. Spit out an error, this should be a bug report - */ - rc = -EFAULT; - if (md_status & CXLMDEV_DEV_FATAL) { - dev_err(dev, "mbox: reported ready, but fatal\n"); - goto out; - } - if (md_status & CXLMDEV_FW_HALT) { - dev_err(dev, "mbox: reported ready, but halted\n"); - goto out; - } - if (CXLMDEV_RESET_NEEDED(md_status)) { - dev_err(dev, "mbox: reported ready, but reset needed\n"); - goto out; - } - - /* with lock held */ - return 0; - -out: - mutex_unlock(&cxlds->mbox_mutex); - return rc; -} - -/** - * cxl_pci_mbox_put() - Release exclusive access to the mailbox. - * @cxlds: The device state to communicate with. - * - * Context: Any context. Expects mbox_mutex to be held. - */ -static void cxl_pci_mbox_put(struct cxl_dev_state *cxlds) -{ - mutex_unlock(&cxlds->mbox_mutex); -} - static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { int rc; - rc = cxl_pci_mbox_get(cxlds); - if (rc) - return rc; - + mutex_lock_io(&cxlds->mbox_mutex); rc = __cxl_pci_mbox_send_cmd(cxlds, cmd); - cxl_pci_mbox_put(cxlds); + mutex_unlock(&cxlds->mbox_mutex); return rc; } @@ -310,11 +233,20 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) } while (!time_after(jiffies, timeout)); if (!(md_status & CXLMDEV_MBOX_IF_READY)) { - dev_err(cxlds->dev, - "timeout awaiting mailbox ready, device state:%s%s\n", - md_status & CXLMDEV_DEV_FATAL ? " fatal" : "", - md_status & CXLMDEV_FW_HALT ? " firmware-halt" : ""); - return -EIO; + cxl_err(cxlds->dev, md_status, + "timeout awaiting mailbox ready"); + return -ETIMEDOUT; + } + + /* + * A command may be in flight from a previous driver instance, + * think kexec, do one doorbell wait so that + * __cxl_pci_mbox_send_cmd() can assume that it is the only + * source for future doorbell busy events. + */ + if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { + cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle"); + return -ETIMEDOUT; } cxlds->mbox_send = cxl_pci_mbox_send; From 46c6ad27625ca00f59903585e41667d7a45b4eb8 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jan 2022 16:29:00 -0800 Subject: [PATCH 04/48] cxl: Flesh out register names Get a better naming scheme in place for upcoming additions. By dropping redundant usages of CXL and DVSEC where appropriate we can get more concise and also more grepable defines. Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Signed-off-by: Ben Widawsky Link: https://lore.kernel.org/r/164298414022.3018233.15522855498759815097.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 14 +++++++------- drivers/cxl/pci.h | 19 ++++++++++--------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index baeea0c2e619..0981d8f375ad 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -370,10 +370,10 @@ static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *ma static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi, struct cxl_register_map *map) { - map->block_offset = - ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK); - map->barno = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo); - map->reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo); + map->block_offset = ((u64)reg_hi << 32) | + (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); + map->barno = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); + map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); } /** @@ -394,15 +394,15 @@ static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, int regloc, i; regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, - PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID); + CXL_DVSEC_REG_LOCATOR); if (!regloc) return -ENXIO; pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); - regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET; - regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8; + regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET; + regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8; for (i = 0; i < regblocks; i++, regloc += 8) { u32 reg_lo, reg_hi; diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h index 7d3e4bf06b45..29b8eaef3a0a 100644 --- a/drivers/cxl/pci.h +++ b/drivers/cxl/pci.h @@ -7,17 +7,21 @@ /* * See section 8.1 Configuration Space Registers in the CXL 2.0 - * Specification + * Specification. Names are taken straight from the specification with "CXL" and + * "DVSEC" redundancies removed. When obvious, abbreviations may be used. */ #define PCI_DVSEC_HEADER1_LENGTH_MASK GENMASK(31, 20) #define PCI_DVSEC_VENDOR_ID_CXL 0x1E98 -#define PCI_DVSEC_ID_CXL 0x0 -#define PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID 0x8 -#define PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET 0xC +/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ +#define CXL_DVSEC_PCIE_DEVICE 0 -/* BAR Indicator Register (BIR) */ -#define CXL_REGLOC_BIR_MASK GENMASK(2, 0) +/* CXL 2.0 8.1.9: Register Locator DVSEC */ +#define CXL_DVSEC_REG_LOCATOR 8 +#define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC +#define CXL_DVSEC_REG_LOCATOR_BIR_MASK GENMASK(2, 0) +#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8) +#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16) /* Register Block Identifier (RBI) */ enum cxl_regloc_type { @@ -28,7 +32,4 @@ enum cxl_regloc_type { CXL_REGLOC_RBI_TYPES }; -#define CXL_REGLOC_RBI_MASK GENMASK(15, 8) -#define CXL_REGLOC_ADDR_MASK GENMASK(31, 16) - #endif /* __CXL_PCI_H__ */ From 8baa787b93dbda6b24081297b934e8edd886d4bb Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jan 2022 16:29:05 -0800 Subject: [PATCH 05/48] cxl/pci: Add new DVSEC definitions In preparation for properly supporting memory active timeout, and later on, other attributes obtained from DVSEC fields, add the full list of DVSEC identifiers from the CXL 2.0 specification. Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron (v1) Signed-off-by: Ben Widawsky Link: https://lore.kernel.org/r/164298414567.3018233.12005290051592771878.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/pci.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h index 29b8eaef3a0a..8ae2b4adc59d 100644 --- a/drivers/cxl/pci.h +++ b/drivers/cxl/pci.h @@ -16,6 +16,21 @@ /* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ #define CXL_DVSEC_PCIE_DEVICE 0 +/* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */ +#define CXL_DVSEC_FUNCTION_MAP 2 + +/* CXL 2.0 8.1.5: CXL 2.0 Extensions DVSEC for Ports */ +#define CXL_DVSEC_PORT_EXTENSIONS 3 + +/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */ +#define CXL_DVSEC_PORT_GPF 4 + +/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */ +#define CXL_DVSEC_DEVICE_GPF 5 + +/* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */ +#define CXL_DVSEC_PCIE_FLEXBUS_PORT 7 + /* CXL 2.0 8.1.9: Register Locator DVSEC */ #define CXL_DVSEC_REG_LOCATOR 8 #define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC From 303ebc1b1741b6a18349d8e5753c2d25fdb41a21 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jan 2022 16:29:10 -0800 Subject: [PATCH 06/48] cxl/acpi: Map component registers for Root Ports This implements the TODO in cxl_acpi for mapping component registers. cxl_acpi becomes the second consumer of CXL register block enumeration (cxl_pci being the first). Moving the functionality to cxl_core allows both of these drivers to use the functionality. Equally importantly it allows cxl_core to use the functionality in the future. CXL 2.0 root ports are similar to CXL 2.0 Downstream Ports with the main distinction being they're a part of the CXL 2.0 host bridge. While mapping their component registers is not immediately useful for the CXL drivers, the movement of register block enumeration into core is a vital step towards HDM decoder programming. Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron [djbw: fix cxl_regmap_to_base() failure cases] Link: https://lore.kernel.org/r/164298415080.3018233.14694957480228676592.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 13 ++++++++-- drivers/cxl/core/regs.c | 56 +++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 4 +++ drivers/cxl/pci.c | 52 -------------------------------------- drivers/cxl/pci.h | 9 +++++++ 5 files changed, 80 insertions(+), 54 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 3163167ecc3a..c656a49a11a9 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -7,6 +7,7 @@ #include #include #include "cxl.h" +#include "pci.h" /* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */ #define CFMWS_INTERLEAVE_WAYS(x) (1 << (x)->interleave_ways) @@ -134,11 +135,13 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, __mock int match_add_root_ports(struct pci_dev *pdev, void *data) { + resource_size_t creg = CXL_RESOURCE_NONE; struct cxl_walk_context *ctx = data; struct pci_bus *root_bus = ctx->root; struct cxl_port *port = ctx->port; int type = pci_pcie_type(pdev); struct device *dev = ctx->dev; + struct cxl_register_map map; u32 lnkcap, port_num; int rc; @@ -152,9 +155,15 @@ __mock int match_add_root_ports(struct pci_dev *pdev, void *data) &lnkcap) != PCIBIOS_SUCCESSFUL) return 0; - /* TODO walk DVSEC to find component register base */ + /* The driver doesn't rely on component registers for Root Ports yet. */ + rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + if (!rc) + dev_info(&pdev->dev, "No component register block found\n"); + + creg = cxl_regmap_to_base(pdev, &map); + port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); - rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE); + rc = cxl_add_dport(port, &pdev->dev, port_num, creg); if (rc) { ctx->error = rc; return rc; diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index e37e23bf4355..0d63758e2605 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -5,6 +5,7 @@ #include #include #include +#include /** * DOC: cxl registers @@ -247,3 +248,58 @@ int cxl_map_device_regs(struct pci_dev *pdev, return 0; } EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL); + +static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi, + struct cxl_register_map *map) +{ + map->block_offset = ((u64)reg_hi << 32) | + (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); + map->barno = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); + map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); +} + +/** + * cxl_find_regblock() - Locate register blocks by type + * @pdev: The CXL PCI device to enumerate. + * @type: Register Block Indicator id + * @map: Enumeration output, clobbered on error + * + * Return: 0 if register block enumerated, negative error code otherwise + * + * A CXL DVSEC may point to one or more register blocks, search for them + * by @type. + */ +int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map) +{ + u32 regloc_size, regblocks; + int regloc, i; + + map->block_offset = U64_MAX; + regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, + CXL_DVSEC_REG_LOCATOR); + if (!regloc) + return -ENXIO; + + pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); + regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); + + regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET; + regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8; + + for (i = 0; i < regblocks; i++, regloc += 8) { + u32 reg_lo, reg_hi; + + pci_read_config_dword(pdev, regloc, ®_lo); + pci_read_config_dword(pdev, regloc + 4, ®_hi); + + cxl_decode_regblock(reg_lo, reg_hi, map); + + if (map->reg_type == type) + return 0; + } + + map->block_offset = U64_MAX; + return -ENODEV; +} +EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index a5a0be3f088b..6288a6c1fc5c 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -145,6 +145,10 @@ int cxl_map_device_regs(struct pci_dev *pdev, struct cxl_device_regs *regs, struct cxl_register_map *map); +enum cxl_regloc_type; +int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map); + #define CXL_RESOURCE_NONE ((resource_size_t) -1) #define CXL_TARGET_STRLEN 20 diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 0981d8f375ad..7d5f0c873aba 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -367,58 +367,6 @@ static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *ma return 0; } -static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi, - struct cxl_register_map *map) -{ - map->block_offset = ((u64)reg_hi << 32) | - (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); - map->barno = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); - map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); -} - -/** - * cxl_find_regblock() - Locate register blocks by type - * @pdev: The CXL PCI device to enumerate. - * @type: Register Block Indicator id - * @map: Enumeration output, clobbered on error - * - * Return: 0 if register block enumerated, negative error code otherwise - * - * A CXL DVSEC may point to one or more register blocks, search for them - * by @type. - */ -static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map) -{ - u32 regloc_size, regblocks; - int regloc, i; - - regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, - CXL_DVSEC_REG_LOCATOR); - if (!regloc) - return -ENXIO; - - pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); - regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); - - regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET; - regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8; - - for (i = 0; i < regblocks; i++, regloc += 8) { - u32 reg_lo, reg_hi; - - pci_read_config_dword(pdev, regloc, ®_lo); - pci_read_config_dword(pdev, regloc + 4, ®_hi); - - cxl_decode_regblock(reg_lo, reg_hi, map); - - if (map->reg_type == type) - return 0; - } - - return -ENODEV; -} - static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map) { diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h index 8ae2b4adc59d..0623bb85f30a 100644 --- a/drivers/cxl/pci.h +++ b/drivers/cxl/pci.h @@ -47,4 +47,13 @@ enum cxl_regloc_type { CXL_REGLOC_RBI_TYPES }; +static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, + struct cxl_register_map *map) +{ + if (map->block_offset == U64_MAX) + return CXL_RESOURCE_NONE; + + return pci_resource_start(pdev, map->barno) + map->block_offset; +} + #endif /* __CXL_PCI_H__ */ From c57cae78bfa6a8535d4baade451107b0577c2750 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jan 2022 16:29:15 -0800 Subject: [PATCH 07/48] cxl: Introduce module_cxl_driver Many CXL drivers simply want to register and unregister themselves. module_driver already supported this. A simple wrapper around that reduces a decent amount of boilerplate in upcoming patches. Suggested-by: Dan Williams Reviewed-by: Dan Williams Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164298415591.3018233.13608495220547681412.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/cxl.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 6288a6c1fc5c..38779409a419 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -308,6 +308,9 @@ int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner, #define cxl_driver_register(x) __cxl_driver_register(x, THIS_MODULE, KBUILD_MODNAME) void cxl_driver_unregister(struct cxl_driver *cxl_drv); +#define module_cxl_driver(__cxl_driver) \ + module_driver(__cxl_driver, cxl_driver_register, cxl_driver_unregister) + #define CXL_DEVICE_NVDIMM_BRIDGE 1 #define CXL_DEVICE_NVDIMM 2 From 0ff0af18216436d0151af4e410400c7a19ca9437 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:29:21 -0800 Subject: [PATCH 08/48] cxl/core/port: Rename bus.c to port.c Given it is dominated by port infrastructure, and will only acquire more, rename bus.c to port.c. Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/164298416136.3018233.15442880970000855425.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- Documentation/driver-api/cxl/memory-devices.rst | 4 ++-- drivers/cxl/core/Makefile | 2 +- drivers/cxl/core/{bus.c => port.c} | 0 tools/testing/cxl/Kbuild | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename drivers/cxl/core/{bus.c => port.c} (100%) diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst index 3b8f41395f6b..c8f7a16cd0e3 100644 --- a/Documentation/driver-api/cxl/memory-devices.rst +++ b/Documentation/driver-api/cxl/memory-devices.rst @@ -36,10 +36,10 @@ CXL Core .. kernel-doc:: drivers/cxl/cxl.h :internal: -.. kernel-doc:: drivers/cxl/core/bus.c +.. kernel-doc:: drivers/cxl/core/port.c :doc: cxl core -.. kernel-doc:: drivers/cxl/core/bus.c +.. kernel-doc:: drivers/cxl/core/port.c :identifiers: .. kernel-doc:: drivers/cxl/core/pmem.c diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 40ab50318daf..a90202ac88d2 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o ccflags-y += -I$(srctree)/drivers/cxl -cxl_core-y := bus.o +cxl_core-y := port.o cxl_core-y += pmem.o cxl_core-y += regs.o cxl_core-y += memdev.o diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/port.c similarity index 100% rename from drivers/cxl/core/bus.c rename to drivers/cxl/core/port.c diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 1acdf2fc31c5..3299fb0977b2 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -25,7 +25,7 @@ cxl_pmem-y += config_check.o obj-m += cxl_core.o -cxl_core-y := $(CXL_CORE_SRC)/bus.o +cxl_core-y := $(CXL_CORE_SRC)/port.o cxl_core-y += $(CXL_CORE_SRC)/pmem.o cxl_core-y += $(CXL_CORE_SRC)/regs.o cxl_core-y += $(CXL_CORE_SRC)/memdev.o From c3bca8d4bb3ff77b8784cdc794eb1f8f89b10fb5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:29:26 -0800 Subject: [PATCH 09/48] cxl/decoder: Hide physical address information from non-root Just like /proc/iomem, CXL physical address information is reserved for root only. Reviewed-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164298416650.3018233.450720006145238709.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 3f9b98ecd18b..c5e74c6f04e8 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -49,7 +49,7 @@ static ssize_t start_show(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%#llx\n", cxld->range.start); } -static DEVICE_ATTR_RO(start); +static DEVICE_ATTR_ADMIN_RO(start); static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) From 608135db1b790170d22848815c4671407af74e37 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jan 2022 16:29:31 -0800 Subject: [PATCH 10/48] cxl/core: Convert decoder range to resource CXL decoders manage address ranges in a hierarchical fashion whereby a leaf is a unique subregion of its parent decoder (midlevel or root). It therefore makes sense to use the resource API for handling this. Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron (v1) Signed-off-by: Ben Widawsky Link: https://lore.kernel.org/r/164298417191.3018233.5201055578165414714.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 22 ++++++++-------------- drivers/cxl/core/port.c | 23 +++++++++++++++++++++-- drivers/cxl/cxl.h | 8 ++++++-- 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index c656a49a11a9..da70f1836db6 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -108,10 +108,8 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions); cxld->target_type = CXL_DECODER_EXPANDER; - cxld->range = (struct range){ - .start = cfmws->base_hpa, - .end = cfmws->base_hpa + cfmws->window_size - 1, - }; + cxld->platform_res = (struct resource)DEFINE_RES_MEM(cfmws->base_hpa, + cfmws->window_size); cxld->interleave_ways = CFMWS_INTERLEAVE_WAYS(cfmws); cxld->interleave_granularity = CFMWS_INTERLEAVE_GRANULARITY(cfmws); @@ -121,14 +119,13 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, else rc = cxl_decoder_autoremove(dev, cxld); if (rc) { - dev_err(dev, "Failed to add decoder for %#llx-%#llx\n", - cfmws->base_hpa, - cfmws->base_hpa + cfmws->window_size - 1); + dev_err(dev, "Failed to add decoder for %pr\n", + &cxld->platform_res); return 0; } - dev_dbg(dev, "add: %s node: %d range %#llx-%#llx\n", - dev_name(&cxld->dev), phys_to_target_node(cxld->range.start), - cfmws->base_hpa, cfmws->base_hpa + cfmws->window_size - 1); + dev_dbg(dev, "add: %s node: %d range %pr\n", dev_name(&cxld->dev), + phys_to_target_node(cxld->platform_res.start), + &cxld->platform_res); return 0; } @@ -270,10 +267,7 @@ static int add_host_bridge_uport(struct device *match, void *arg) cxld->interleave_ways = 1; cxld->interleave_granularity = PAGE_SIZE; cxld->target_type = CXL_DECODER_EXPANDER; - cxld->range = (struct range) { - .start = 0, - .end = -1, - }; + cxld->platform_res = (struct resource)DEFINE_RES_MEM(0, 0); device_lock(&port->dev); dport = list_first_entry(&port->dports, typeof(*dport), list); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index c5e74c6f04e8..63c76cb2a2ec 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -46,8 +46,14 @@ static ssize_t start_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_decoder *cxld = to_cxl_decoder(dev); + u64 start; - return sysfs_emit(buf, "%#llx\n", cxld->range.start); + if (is_root_decoder(dev)) + start = cxld->platform_res.start; + else + start = cxld->decoder_range.start; + + return sysfs_emit(buf, "%#llx\n", start); } static DEVICE_ATTR_ADMIN_RO(start); @@ -55,8 +61,14 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_decoder *cxld = to_cxl_decoder(dev); + u64 size; - return sysfs_emit(buf, "%#llx\n", range_len(&cxld->range)); + if (is_root_decoder(dev)) + size = resource_size(&cxld->platform_res); + else + size = range_len(&cxld->decoder_range); + + return sysfs_emit(buf, "%#llx\n", size); } static DEVICE_ATTR_RO(size); @@ -546,6 +558,13 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) if (rc) return rc; + /* + * Platform decoder resources should show up with a reasonable name. All + * other resources are just sub ranges within the main decoder resource. + */ + if (is_root_decoder(dev)) + cxld->platform_res.name = dev_name(dev); + return device_add(dev); } EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 38779409a419..bfd95acea66c 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -179,7 +179,8 @@ enum cxl_decoder_type { * struct cxl_decoder - CXL address range decode configuration * @dev: this decoder's device * @id: kernel device name id - * @range: address range considered by this decoder + * @platform_res: address space resources considered by root decoder + * @decoder_range: address space resources considered by midlevel decoder * @interleave_ways: number of cxl_dports in this decode * @interleave_granularity: data stride per dport * @target_type: accelerator vs expander (type2 vs type3) selector @@ -190,7 +191,10 @@ enum cxl_decoder_type { struct cxl_decoder { struct device dev; int id; - struct range range; + union { + struct resource platform_res; + struct range decoder_range; + }; int interleave_ways; int interleave_granularity; enum cxl_decoder_type target_type; From d54c1bbe2d34e301382968d8b05bd8162e8f60fb Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Mon, 31 Jan 2022 13:33:13 -0800 Subject: [PATCH 11/48] cxl/core/port: Clarify decoder creation Add wrappers for the creation of decoder objects at the root level and switch level, and keep the core helper private to cxl/core/port.c. Root decoders are static descriptors conveyed from platform firmware (e.g. ACPI CFMWS). Switch decoders are CXL standard decoders enumerated via the HDM decoder capability structure. The base address for the HDM decoder capability structure may be conveyed either by PCIe or platform firmware (ACPI CEDT.CHBS). Additionally, the kdoc descriptions for these helpers and their dependencies is updated. Signed-off-by: Ben Widawsky [djbw: fixup changelog, clarify kdoc] Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164366463014.111117.9714595404002687111.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 4 +- drivers/cxl/core/port.c | 83 +++++++++++++++++++++++++++++++++++++---- drivers/cxl/cxl.h | 16 +++++++- 3 files changed, 92 insertions(+), 11 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index da70f1836db6..0b267eabb15e 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -102,7 +102,7 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, for (i = 0; i < CFMWS_INTERLEAVE_WAYS(cfmws); i++) target_map[i] = cfmws->interleave_targets[i]; - cxld = cxl_decoder_alloc(root_port, CFMWS_INTERLEAVE_WAYS(cfmws)); + cxld = cxl_root_decoder_alloc(root_port, CFMWS_INTERLEAVE_WAYS(cfmws)); if (IS_ERR(cxld)) return 0; @@ -260,7 +260,7 @@ static int add_host_bridge_uport(struct device *match, void *arg) * dport. Disable the range until the first CXL region is enumerated / * activated. */ - cxld = cxl_decoder_alloc(port, 1); + cxld = cxl_switch_decoder_alloc(port, 1); if (IS_ERR(cxld)) return PTR_ERR(cxld); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 63c76cb2a2ec..88ffec71464a 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -495,13 +495,26 @@ static int decoder_populate_targets(struct cxl_decoder *cxld, return rc; } -struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets) +/** + * cxl_decoder_alloc - Allocate a new CXL decoder + * @port: owning port of this decoder + * @nr_targets: downstream targets accessible by this decoder. All upstream + * ports and root ports must have at least 1 target. + * + * A port should contain one or more decoders. Each of those decoders enable + * some address space for CXL.mem utilization. A decoder is expected to be + * configured by the caller before registering. + * + * Return: A new cxl decoder to be registered by cxl_decoder_add() + */ +static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets) { struct cxl_decoder *cxld; struct device *dev; int rc = 0; - if (nr_targets > CXL_DECODER_MAX_INTERLEAVE || nr_targets < 1) + if (nr_targets > CXL_DECODER_MAX_INTERLEAVE || nr_targets == 0) return ERR_PTR(-EINVAL); cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL); @@ -519,20 +532,74 @@ struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets) device_set_pm_not_required(dev); dev->parent = &port->dev; dev->bus = &cxl_bus_type; - - /* root ports do not have a cxl_port_type parent */ - if (port->dev.parent->type == &cxl_port_type) - dev->type = &cxl_decoder_switch_type; + if (is_cxl_root(port)) + cxld->dev.type = &cxl_decoder_root_type; else - dev->type = &cxl_decoder_root_type; + cxld->dev.type = &cxl_decoder_switch_type; return cxld; err: kfree(cxld); return ERR_PTR(rc); } -EXPORT_SYMBOL_NS_GPL(cxl_decoder_alloc, CXL); +/** + * cxl_root_decoder_alloc - Allocate a root level decoder + * @port: owning CXL root of this decoder + * @nr_targets: static number of downstream targets + * + * Return: A new cxl decoder to be registered by cxl_decoder_add(). A + * 'CXL root' decoder is one that decodes from a top-level / static platform + * firmware description of CXL resources into a CXL standard decode + * topology. + */ +struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets) +{ + if (!is_cxl_root(port)) + return ERR_PTR(-EINVAL); + + return cxl_decoder_alloc(port, nr_targets); +} +EXPORT_SYMBOL_NS_GPL(cxl_root_decoder_alloc, CXL); + +/** + * cxl_switch_decoder_alloc - Allocate a switch level decoder + * @port: owning CXL switch port of this decoder + * @nr_targets: max number of dynamically addressable downstream targets + * + * Return: A new cxl decoder to be registered by cxl_decoder_add(). A + * 'switch' decoder is any decoder that can be enumerated by PCIe + * topology and the HDM Decoder Capability. This includes the decoders + * that sit between Switch Upstream Ports / Switch Downstream Ports and + * Host Bridges / Root Ports. + */ +struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets) +{ + if (is_cxl_root(port)) + return ERR_PTR(-EINVAL); + + return cxl_decoder_alloc(port, nr_targets); +} +EXPORT_SYMBOL_NS_GPL(cxl_switch_decoder_alloc, CXL); + +/** + * cxl_decoder_add - Add a decoder with targets + * @cxld: The cxl decoder allocated by cxl_decoder_alloc() + * @target_map: A list of downstream ports that this decoder can direct memory + * traffic to. These numbers should correspond with the port number + * in the PCIe Link Capabilities structure. + * + * Certain types of decoders may not have any targets. The main example of this + * is an endpoint device. A more awkward example is a hostbridge whose root + * ports get hot added (technically possible, though unlikely). + * + * Context: Process context. Takes and releases the cxld's device lock. + * + * Return: Negative error code if the decoder wasn't properly configured; else + * returns 0. + */ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) { struct cxl_port *port; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index bfd95acea66c..621a70e023c1 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -278,6 +278,17 @@ struct cxl_dport { struct list_head list; }; +/* + * The platform firmware device hosting the root is also the top of the + * CXL port topology. All other CXL ports have another CXL port as their + * parent and their ->uport / host device is out-of-line of the port + * ancestry. + */ +static inline bool is_cxl_root(struct cxl_port *port) +{ + return port->uport == port->dev.parent; +} + struct cxl_port *to_cxl_port(struct device *dev); struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, resource_size_t component_reg_phys, @@ -288,7 +299,10 @@ int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id, struct cxl_decoder *to_cxl_decoder(struct device *dev); bool is_root_decoder(struct device *dev); -struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets); +struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets); +struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets); int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld); From d621bc2e7282f9955033a6359877fd4ac4be60e1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:29:42 -0800 Subject: [PATCH 12/48] cxl/core: Fix cxl_probe_component_regs() error message Fix a '\n' vs '/n' typo. Fixes: 08422378c4ad ("cxl/pci: Add HDM decoder capabilities") Acked-by: Ben Widawsky Link: https://lore.kernel.org/r/164298418268.3018233.17790073375430834911.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 0d63758e2605..12a6cbddf110 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -50,7 +50,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) { dev_err(dev, - "Couldn't locate the CXL.cache and CXL.mem capability array header./n"); + "Couldn't locate the CXL.cache and CXL.mem capability array header.\n"); return; } From d2b61ed2ff63fd9f294db8399c7a680ea7fe8a23 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jan 2022 16:29:47 -0800 Subject: [PATCH 13/48] cxl/core/port: Make passthrough decoder init implicit Unused CXL decoders, or ports which use a passthrough decoder (no HDM decoder registers) are expected to be initialized in a specific way. Since upcoming drivers will want the same initialization, and it was already a requirement to have consumers of the API configure the decoder specific to their needs, initialize to this passthrough state by default. Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164298418778.3018233.13573986275832546547.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 5 ----- drivers/cxl/core/port.c | 9 ++++++++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 0b267eabb15e..4e8086525edc 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -264,11 +264,6 @@ static int add_host_bridge_uport(struct device *match, void *arg) if (IS_ERR(cxld)) return PTR_ERR(cxld); - cxld->interleave_ways = 1; - cxld->interleave_granularity = PAGE_SIZE; - cxld->target_type = CXL_DECODER_EXPANDER; - cxld->platform_res = (struct resource)DEFINE_RES_MEM(0, 0); - device_lock(&port->dev); dport = list_first_entry(&port->dports, typeof(*dport), list); device_unlock(&port->dev); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 88ffec71464a..73ff42a05473 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -505,7 +505,8 @@ static int decoder_populate_targets(struct cxl_decoder *cxld, * some address space for CXL.mem utilization. A decoder is expected to be * configured by the caller before registering. * - * Return: A new cxl decoder to be registered by cxl_decoder_add() + * Return: A new cxl decoder to be registered by cxl_decoder_add(). The decoder + * is initialized to be a "passthrough" decoder. */ static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, unsigned int nr_targets) @@ -537,6 +538,12 @@ static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, else cxld->dev.type = &cxl_decoder_switch_type; + /* Pre initialize an "empty" decoder */ + cxld->interleave_ways = 1; + cxld->interleave_granularity = PAGE_SIZE; + cxld->target_type = CXL_DECODER_EXPANDER; + cxld->platform_res = (struct resource)DEFINE_RES_MEM(0, 0); + return cxld; err: kfree(cxld); From 53fa1bff3426344d466d91e81f076eab677d0ece Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jan 2022 16:29:53 -0800 Subject: [PATCH 14/48] cxl/core: Track port depth In preparation for proving CXL subsystem usage of the device_lock() order track the depth of ports with the expectation that shallower port locks can be held over deeper port locks. Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164298419321.3018233.4469731547378993606.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 2 ++ drivers/cxl/cxl.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 73ff42a05473..f287d87da6d6 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -362,6 +362,8 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, if (IS_ERR(port)) return port; + if (parent_port) + port->depth = parent_port->depth + 1; dev = &port->dev; if (parent_port) rc = dev_set_name(dev, "port%d", port->id); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 621a70e023c1..7ade555076bc 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -252,6 +252,7 @@ struct cxl_walk_context { * @dports: cxl_dport instances referenced by decoders * @decoder_ida: allocator for decoder ids * @component_reg_phys: component register capability base address (optional) + * @depth: How deep this port is relative to the root. depth 0 is the root. */ struct cxl_port { struct device dev; @@ -260,6 +261,7 @@ struct cxl_port { struct list_head dports; struct ida decoder_ida; resource_size_t component_reg_phys; + unsigned int depth; }; /** From 3c5b903955251ea464fca383a42d981e33004df6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 31 Jan 2022 11:50:09 -0800 Subject: [PATCH 15/48] cxl: Prove CXL locking When CONFIG_PROVE_LOCKING is enabled the 'struct device' definition gets an additional mutex that is not clobbered by lockdep_set_novalidate_class() like the typical device_lock(). This allows for local annotation of subsystem locks with mutex_lock_nested() per the subsystem's object/lock hierarchy. For CXL, this primarily needs the ability to lock ports by depth and child objects of ports by their parent parent-port lock. Reviewed-by: Jonathan Cameron Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/164365853422.99383.1052399160445197427.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 10 ++--- drivers/cxl/core/pmem.c | 4 +- drivers/cxl/core/port.c | 47 +++++++++++++++++------ drivers/cxl/cxl.h | 81 ++++++++++++++++++++++++++++++++++++++++ drivers/cxl/pmem.c | 12 +++--- drivers/nvdimm/nd-core.h | 2 +- lib/Kconfig.debug | 23 ++++++++++++ 7 files changed, 154 insertions(+), 25 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 4e8086525edc..93d1dc56892a 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -176,14 +176,14 @@ static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device { struct cxl_dport *dport; - device_lock(&port->dev); + cxl_device_lock(&port->dev); list_for_each_entry(dport, &port->dports, list) if (dport->dport == dev) { - device_unlock(&port->dev); + cxl_device_unlock(&port->dev); return dport; } - device_unlock(&port->dev); + cxl_device_unlock(&port->dev); return NULL; } @@ -264,9 +264,9 @@ static int add_host_bridge_uport(struct device *match, void *arg) if (IS_ERR(cxld)) return PTR_ERR(cxld); - device_lock(&port->dev); + cxl_device_lock(&port->dev); dport = list_first_entry(&port->dports, typeof(*dport), list); - device_unlock(&port->dev); + cxl_device_unlock(&port->dev); single_port_map[0] = dport->port_id; diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index b5fca97b0a07..40b3f5030496 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -115,10 +115,10 @@ static void unregister_nvb(void *_cxl_nvb) * work to flush. Once the state has been changed to 'dead' then no new * work can be queued by user-triggered bind. */ - device_lock(&cxl_nvb->dev); + cxl_device_lock(&cxl_nvb->dev); flush = cxl_nvb->state != CXL_NVB_NEW; cxl_nvb->state = CXL_NVB_DEAD; - device_unlock(&cxl_nvb->dev); + cxl_device_unlock(&cxl_nvb->dev); /* * Even though the device core will trigger device_release_driver() diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index f287d87da6d6..9285cdb734b2 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -111,7 +111,7 @@ static ssize_t target_list_show(struct device *dev, ssize_t offset = 0; int i, rc = 0; - device_lock(dev); + cxl_device_lock(dev); for (i = 0; i < cxld->interleave_ways; i++) { struct cxl_dport *dport = cxld->target[i]; struct cxl_dport *next = NULL; @@ -127,7 +127,7 @@ static ssize_t target_list_show(struct device *dev, break; offset += rc; } - device_unlock(dev); + cxl_device_unlock(dev); if (rc < 0) return rc; @@ -214,6 +214,12 @@ bool is_root_decoder(struct device *dev) } EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL); +bool is_cxl_decoder(struct device *dev) +{ + return dev->type->release == cxl_decoder_release; +} +EXPORT_SYMBOL_NS_GPL(is_cxl_decoder, CXL); + struct cxl_decoder *to_cxl_decoder(struct device *dev) { if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release, @@ -235,10 +241,10 @@ static void cxl_port_release(struct device *dev) struct cxl_port *port = to_cxl_port(dev); struct cxl_dport *dport, *_d; - device_lock(dev); + cxl_device_lock(dev); list_for_each_entry_safe(dport, _d, &port->dports, list) cxl_dport_release(dport); - device_unlock(dev); + cxl_device_unlock(dev); ida_free(&cxl_port_ida, port->id); kfree(port); } @@ -254,6 +260,12 @@ static const struct device_type cxl_port_type = { .groups = cxl_port_attribute_groups, }; +bool is_cxl_port(struct device *dev) +{ + return dev->type == &cxl_port_type; +} +EXPORT_SYMBOL_NS_GPL(is_cxl_port, CXL); + struct cxl_port *to_cxl_port(struct device *dev) { if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type, @@ -261,13 +273,14 @@ struct cxl_port *to_cxl_port(struct device *dev) return NULL; return container_of(dev, struct cxl_port, dev); } +EXPORT_SYMBOL_NS_GPL(to_cxl_port, CXL); static void unregister_port(void *_port) { struct cxl_port *port = _port; struct cxl_dport *dport; - device_lock(&port->dev); + cxl_device_lock(&port->dev); list_for_each_entry(dport, &port->dports, list) { char link_name[CXL_TARGET_STRLEN]; @@ -276,7 +289,7 @@ static void unregister_port(void *_port) continue; sysfs_remove_link(&port->dev.kobj, link_name); } - device_unlock(&port->dev); + cxl_device_unlock(&port->dev); device_unregister(&port->dev); } @@ -407,7 +420,7 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) { struct cxl_dport *dup; - device_lock(&port->dev); + cxl_device_lock(&port->dev); dup = find_dport(port, new->port_id); if (dup) dev_err(&port->dev, @@ -416,7 +429,7 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) dev_name(dup->dport)); else list_add_tail(&new->list, &port->dports); - device_unlock(&port->dev); + cxl_device_unlock(&port->dev); return dup ? -EEXIST : 0; } @@ -475,7 +488,7 @@ static int decoder_populate_targets(struct cxl_decoder *cxld, if (!target_map) return 0; - device_lock(&port->dev); + cxl_device_lock(&port->dev); if (list_empty(&port->dports)) { rc = -EINVAL; goto out_unlock; @@ -492,7 +505,7 @@ static int decoder_populate_targets(struct cxl_decoder *cxld, } out_unlock: - device_unlock(&port->dev); + cxl_device_unlock(&port->dev); return rc; } @@ -717,15 +730,27 @@ static int cxl_bus_match(struct device *dev, struct device_driver *drv) static int cxl_bus_probe(struct device *dev) { - return to_cxl_drv(dev->driver)->probe(dev); + int rc; + + /* + * Take the CXL nested lock since the driver core only holds + * @dev->mutex and not @dev->lockdep_mutex. + */ + cxl_nested_lock(dev); + rc = to_cxl_drv(dev->driver)->probe(dev); + cxl_nested_unlock(dev); + + return rc; } static void cxl_bus_remove(struct device *dev) { struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver); + cxl_nested_lock(dev); if (cxl_drv->remove) cxl_drv->remove(dev); + cxl_nested_unlock(dev); } struct bus_type cxl_bus_type = { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 7ade555076bc..6a38d2e1f3dd 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -291,6 +291,7 @@ static inline bool is_cxl_root(struct cxl_port *port) return port->uport == port->dev.parent; } +bool is_cxl_port(struct device *dev); struct cxl_port *to_cxl_port(struct device *dev); struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, resource_size_t component_reg_phys, @@ -301,6 +302,7 @@ int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id, struct cxl_decoder *to_cxl_decoder(struct device *dev); bool is_root_decoder(struct device *dev); +bool is_cxl_decoder(struct device *dev); struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, @@ -353,4 +355,83 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd); #ifndef __mock #define __mock static #endif + +#ifdef CONFIG_PROVE_CXL_LOCKING +enum cxl_lock_class { + CXL_ANON_LOCK, + CXL_NVDIMM_LOCK, + CXL_NVDIMM_BRIDGE_LOCK, + CXL_PORT_LOCK, + /* + * Be careful to add new lock classes here, CXL_PORT_LOCK is + * extended by the port depth, so a maximum CXL port topology + * depth would need to be defined first. + */ +}; + +static inline void cxl_nested_lock(struct device *dev) +{ + if (is_cxl_port(dev)) { + struct cxl_port *port = to_cxl_port(dev); + + mutex_lock_nested(&dev->lockdep_mutex, + CXL_PORT_LOCK + port->depth); + } else if (is_cxl_decoder(dev)) { + struct cxl_port *port = to_cxl_port(dev->parent); + + /* + * A decoder is the immediate child of a port, so set + * its lock class equal to other child device siblings. + */ + mutex_lock_nested(&dev->lockdep_mutex, + CXL_PORT_LOCK + port->depth + 1); + } else if (is_cxl_nvdimm_bridge(dev)) + mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_BRIDGE_LOCK); + else if (is_cxl_nvdimm(dev)) + mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_LOCK); + else + mutex_lock_nested(&dev->lockdep_mutex, CXL_ANON_LOCK); +} + +static inline void cxl_nested_unlock(struct device *dev) +{ + mutex_unlock(&dev->lockdep_mutex); +} + +static inline void cxl_device_lock(struct device *dev) +{ + /* + * For double lock errors the lockup will happen before lockdep + * warns at cxl_nested_lock(), so assert explicitly. + */ + lockdep_assert_not_held(&dev->lockdep_mutex); + + device_lock(dev); + cxl_nested_lock(dev); +} + +static inline void cxl_device_unlock(struct device *dev) +{ + cxl_nested_unlock(dev); + device_unlock(dev); +} +#else +static inline void cxl_nested_lock(struct device *dev) +{ +} + +static inline void cxl_nested_unlock(struct device *dev) +{ +} + +static inline void cxl_device_lock(struct device *dev) +{ + device_lock(dev); +} + +static inline void cxl_device_unlock(struct device *dev) +{ + device_unlock(dev); +} +#endif #endif /* __CXL_H__ */ diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index b65a272a2d6d..15ad666ab03e 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -43,7 +43,7 @@ static int cxl_nvdimm_probe(struct device *dev) if (!cxl_nvb) return -ENXIO; - device_lock(&cxl_nvb->dev); + cxl_device_lock(&cxl_nvb->dev); if (!cxl_nvb->nvdimm_bus) { rc = -ENXIO; goto out; @@ -68,7 +68,7 @@ static int cxl_nvdimm_probe(struct device *dev) dev_set_drvdata(dev, nvdimm); rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); out: - device_unlock(&cxl_nvb->dev); + cxl_device_unlock(&cxl_nvb->dev); put_device(&cxl_nvb->dev); return rc; @@ -233,7 +233,7 @@ static void cxl_nvb_update_state(struct work_struct *work) struct nvdimm_bus *victim_bus = NULL; bool release = false, rescan = false; - device_lock(&cxl_nvb->dev); + cxl_device_lock(&cxl_nvb->dev); switch (cxl_nvb->state) { case CXL_NVB_ONLINE: if (!online_nvdimm_bus(cxl_nvb)) { @@ -251,7 +251,7 @@ static void cxl_nvb_update_state(struct work_struct *work) default: break; } - device_unlock(&cxl_nvb->dev); + cxl_device_unlock(&cxl_nvb->dev); if (release) device_release_driver(&cxl_nvb->dev); @@ -327,9 +327,9 @@ static int cxl_nvdimm_bridge_reset(struct device *dev, void *data) return 0; cxl_nvb = to_cxl_nvdimm_bridge(dev); - device_lock(dev); + cxl_device_lock(dev); cxl_nvb->state = CXL_NVB_NEW; - device_unlock(dev); + cxl_device_unlock(dev); return 0; } diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index a11850dd475d..2650a852eeaf 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -185,7 +185,7 @@ static inline void devm_nsio_disable(struct device *dev, } #endif -#ifdef CONFIG_PROVE_LOCKING +#ifdef CONFIG_PROVE_NVDIMM_LOCKING extern struct class *nd_class; enum { diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 14b89aa37c5c..7dea203964f7 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1516,6 +1516,29 @@ config CSD_LOCK_WAIT_DEBUG include the IPI handler function currently executing (if any) and relevant stack traces. +choice + prompt "Lock debugging: prove subsystem device_lock() correctness" + depends on PROVE_LOCKING + help + For subsystems that have instrumented their usage of the device_lock() + with nested annotations, enable lock dependency checking. The locking + hierarchy 'subclass' identifiers are not compatible across + sub-systems, so only one can be enabled at a time. + +config PROVE_NVDIMM_LOCKING + bool "NVDIMM" + depends on LIBNVDIMM + help + Enable lockdep to validate nd_device_lock() usage. + +config PROVE_CXL_LOCKING + bool "CXL" + depends on CXL_BUS + help + Enable lockdep to validate cxl_device_lock() usage. + +endchoice + endmenu # lock debugging config TRACE_IRQFLAGS From 86c8ea0f3b32aae6d824bdc0d835b6a9361dc912 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 31 Jan 2022 15:35:18 -0800 Subject: [PATCH 16/48] cxl/core/port: Use dedicated lock for decoder target list Lockdep reports: ====================================================== WARNING: possible circular locking dependency detected 5.16.0-rc1+ #142 Tainted: G OE ------------------------------------------------------ cxl/1220 is trying to acquire lock: ffff979b85475460 (kn->active#144){++++}-{0:0}, at: __kernfs_remove+0x1ab/0x1e0 but task is already holding lock: ffff979b87ab38e8 (&dev->lockdep_mutex#2/4){+.+.}-{3:3}, at: cxl_remove_ep+0x50c/0x5c0 [cxl_core] ...where cxl_remove_ep() is a helper that wants to delete ports while holding a lock on the host device for that port. That sets up a lockdep violation whereby target_list_show() can not rely holding the decoder's device lock while walking the target_list. Switch to a dedicated seqlock for this purpose. Reported-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164367209095.208169.1171673319121271280.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 30 +++++++++++++++++++++++------- drivers/cxl/cxl.h | 2 ++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 9285cdb734b2..fc5d86222bc3 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -104,14 +104,11 @@ static ssize_t target_type_show(struct device *dev, } static DEVICE_ATTR_RO(target_type); -static ssize_t target_list_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t emit_target_list(struct cxl_decoder *cxld, char *buf) { - struct cxl_decoder *cxld = to_cxl_decoder(dev); ssize_t offset = 0; int i, rc = 0; - cxl_device_lock(dev); for (i = 0; i < cxld->interleave_ways; i++) { struct cxl_dport *dport = cxld->target[i]; struct cxl_dport *next = NULL; @@ -124,13 +121,29 @@ static ssize_t target_list_show(struct device *dev, rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id, next ? "," : ""); if (rc < 0) - break; + return rc; offset += rc; } - cxl_device_unlock(dev); + + return offset; +} + +static ssize_t target_list_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + ssize_t offset; + unsigned int seq; + int rc; + + do { + seq = read_seqbegin(&cxld->target_lock); + rc = emit_target_list(cxld, buf); + } while (read_seqretry(&cxld->target_lock, seq)); if (rc < 0) return rc; + offset = rc; rc = sysfs_emit_at(buf, offset, "\n"); if (rc < 0) @@ -494,15 +507,17 @@ static int decoder_populate_targets(struct cxl_decoder *cxld, goto out_unlock; } + write_seqlock(&cxld->target_lock); for (i = 0; i < cxld->nr_targets; i++) { struct cxl_dport *dport = find_dport(port, target_map[i]); if (!dport) { rc = -ENXIO; - goto out_unlock; + break; } cxld->target[i] = dport; } + write_sequnlock(&cxld->target_lock); out_unlock: cxl_device_unlock(&port->dev); @@ -543,6 +558,7 @@ static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, cxld->id = rc; cxld->nr_targets = nr_targets; + seqlock_init(&cxld->target_lock); dev = &cxld->dev; device_initialize(dev); device_set_pm_not_required(dev); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 6a38d2e1f3dd..e79162999088 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -185,6 +185,7 @@ enum cxl_decoder_type { * @interleave_granularity: data stride per dport * @target_type: accelerator vs expander (type2 vs type3) selector * @flags: memory type capabilities and locking + * @target_lock: coordinate coherent reads of the target list * @nr_targets: number of elements in @target * @target: active ordered target list in current decoder configuration */ @@ -199,6 +200,7 @@ struct cxl_decoder { int interleave_granularity; enum cxl_decoder_type target_type; unsigned long flags; + seqlock_t target_lock; int nr_targets; struct cxl_dport *target[]; }; From 5ff7316f6fea4798c66b1ba953d1ebe6617503e4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 31 Jan 2022 08:44:52 -0800 Subject: [PATCH 17/48] cxl/port: Introduce cxl_port_to_pci_bus() Add a helper for converting a PCI enumerated cxl_port into the pci_bus that hosts its dports. For switch ports this is trivial, but for root ports there is no generic way to go from a platform defined host bridge device, like ACPI0016 to its corresponding pci_bus. Rather than spill ACPI goop outside of the cxl_acpi driver, just arrange for it to register an xarray translation from the uport device to the corresponding pci_bus. This is in preparation for centralizing dport enumeration in the core. Reviewed-by: Jonathan Cameron Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/164364745633.85488.9744017377155103992.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 14 +++++++++----- drivers/cxl/core/port.c | 37 +++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 3 +++ 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 93d1dc56892a..ab2b76532272 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -225,17 +225,21 @@ static int add_host_bridge_uport(struct device *match, void *arg) return 0; } + /* + * Note that this lookup already succeeded in + * to_cxl_host_bridge(), so no need to check for failure here + */ + pci_root = acpi_pci_find_root(bridge->handle); + rc = devm_cxl_register_pci_bus(host, match, pci_root->bus); + if (rc) + return rc; + port = devm_cxl_add_port(host, match, dport->component_reg_phys, root_port); if (IS_ERR(port)) return PTR_ERR(port); dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev)); - /* - * Note that this lookup already succeeded in - * to_cxl_host_bridge(), so no need to check for failure here - */ - pci_root = acpi_pci_find_root(bridge->handle); ctx = (struct cxl_walk_context){ .dev = host, .root = pci_root->bus, diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index fc5d86222bc3..2a4230d685d5 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -25,6 +25,7 @@ */ static DEFINE_IDA(cxl_port_ida); +static DEFINE_XARRAY(cxl_root_buses); static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -418,6 +419,42 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL); +struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port) +{ + /* There is no pci_bus associated with a CXL platform-root port */ + if (is_cxl_root(port)) + return NULL; + + if (dev_is_pci(port->uport)) { + struct pci_dev *pdev = to_pci_dev(port->uport); + + return pdev->subordinate; + } + + return xa_load(&cxl_root_buses, (unsigned long)port->uport); +} +EXPORT_SYMBOL_NS_GPL(cxl_port_to_pci_bus, CXL); + +static void unregister_pci_bus(void *uport) +{ + xa_erase(&cxl_root_buses, (unsigned long)uport); +} + +int devm_cxl_register_pci_bus(struct device *host, struct device *uport, + struct pci_bus *bus) +{ + int rc; + + if (dev_is_pci(uport)) + return -EINVAL; + + rc = xa_insert(&cxl_root_buses, (unsigned long)uport, bus, GFP_KERNEL); + if (rc) + return rc; + return devm_add_action_or_reset(host, unregister_pci_bus, uport); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_register_pci_bus, CXL); + static struct cxl_dport *find_dport(struct cxl_port *port, int id) { struct cxl_dport *dport; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index e79162999088..4d4cc8292137 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -295,6 +295,9 @@ static inline bool is_cxl_root(struct cxl_port *port) bool is_cxl_port(struct device *dev); struct cxl_port *to_cxl_port(struct device *dev); +int devm_cxl_register_pci_bus(struct device *host, struct device *uport, + struct pci_bus *bus); +struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port); struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, resource_size_t component_reg_phys, struct cxl_port *parent_port); From a46cfc0f011ce77d120e1cdbf973f733d18f0105 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 31 Jan 2022 16:34:40 -0800 Subject: [PATCH 18/48] cxl/pmem: Introduce a find_cxl_root() helper In preparation for switch port enumeration while also preserving the potential for multi-domain / multi-root CXL topologies. Introduce a 'struct device' generic mechanism for retrieving a root CXL port, if one is registered. Note that the only known multi-domain CXL configurations are running the cxl_test unit test on a system that also publishes an ACPI0017 device. With this in hand the nvdimm-bridge lookup can be with device_find_child() instead of bus_find_device() + custom mocked lookup infrastructure in cxl_test. The mechanism looks for a 2nd level port since the root level topology is platform-firmware specific and the 2nd level down follows standard PCIe topology expectations. The cxl_acpi 2nd level is associated with a PCIe Root Port. Reported-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164367562182.225521.9488555616768096049.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pmem.c | 14 +++++++--- drivers/cxl/core/port.c | 49 +++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 1 + tools/testing/cxl/Kbuild | 2 -- tools/testing/cxl/mock_pmem.c | 24 ----------------- 5 files changed, 60 insertions(+), 30 deletions(-) delete mode 100644 tools/testing/cxl/mock_pmem.c diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 40b3f5030496..8de240c4d96b 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -57,24 +57,30 @@ bool is_cxl_nvdimm_bridge(struct device *dev) } EXPORT_SYMBOL_NS_GPL(is_cxl_nvdimm_bridge, CXL); -__mock int match_nvdimm_bridge(struct device *dev, const void *data) +static int match_nvdimm_bridge(struct device *dev, void *data) { return is_cxl_nvdimm_bridge(dev); } struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd) { + struct cxl_port *port = find_cxl_root(&cxl_nvd->dev); struct device *dev; - dev = bus_find_device(&cxl_bus_type, NULL, cxl_nvd, match_nvdimm_bridge); + if (!port) + return NULL; + + dev = device_find_child(&port->dev, NULL, match_nvdimm_bridge); + put_device(&port->dev); + if (!dev) return NULL; + return to_cxl_nvdimm_bridge(dev); } EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL); -static struct cxl_nvdimm_bridge * -cxl_nvdimm_bridge_alloc(struct cxl_port *port) +static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port) { struct cxl_nvdimm_bridge *cxl_nvb; struct device *dev; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 2a4230d685d5..af7a515e4572 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -455,6 +455,55 @@ int devm_cxl_register_pci_bus(struct device *host, struct device *uport, } EXPORT_SYMBOL_NS_GPL(devm_cxl_register_pci_bus, CXL); +/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */ +static int match_root_child(struct device *dev, const void *match) +{ + const struct device *iter = NULL; + struct cxl_port *port, *parent; + struct cxl_dport *dport; + + if (!is_cxl_port(dev)) + return 0; + + port = to_cxl_port(dev); + if (is_cxl_root(port)) + return 0; + + parent = to_cxl_port(port->dev.parent); + if (!is_cxl_root(parent)) + return 0; + + cxl_device_lock(&port->dev); + list_for_each_entry(dport, &port->dports, list) { + iter = match; + while (iter) { + if (iter == dport->dport) + goto out; + iter = iter->parent; + } + } +out: + cxl_device_unlock(&port->dev); + + return !!iter; +} + +struct cxl_port *find_cxl_root(struct device *dev) +{ + struct device *port_dev; + struct cxl_port *root; + + port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child); + if (!port_dev) + return NULL; + + root = to_cxl_port(port_dev->parent); + get_device(&root->dev); + put_device(port_dev); + return root; +} +EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL); + static struct cxl_dport *find_dport(struct cxl_port *port, int id) { struct cxl_dport *dport; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 4d4cc8292137..61b0db526fa2 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -304,6 +304,7 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id, resource_size_t component_reg_phys); +struct cxl_port *find_cxl_root(struct device *dev); struct cxl_decoder *to_cxl_decoder(struct device *dev); bool is_root_decoder(struct device *dev); diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 3299fb0977b2..ddaee8a2c418 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -32,6 +32,4 @@ cxl_core-y += $(CXL_CORE_SRC)/memdev.o cxl_core-y += $(CXL_CORE_SRC)/mbox.o cxl_core-y += config_check.o -cxl_core-y += mock_pmem.o - obj-m += test/ diff --git a/tools/testing/cxl/mock_pmem.c b/tools/testing/cxl/mock_pmem.c deleted file mode 100644 index f7315e6f52c0..000000000000 --- a/tools/testing/cxl/mock_pmem.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2021 Intel Corporation. All rights reserved. */ -#include -#include "test/mock.h" -#include - -int match_nvdimm_bridge(struct device *dev, const void *data) -{ - int index, rc = 0; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - const struct cxl_nvdimm *cxl_nvd = data; - - if (ops) { - if (dev->type == &cxl_nvdimm_bridge_type && - (ops->is_mock_dev(dev->parent->parent) == - ops->is_mock_dev(cxl_nvd->dev.parent->parent))) - rc = 1; - } else - rc = dev->type == &cxl_nvdimm_bridge_type; - - put_cxl_mock_ops(index); - - return rc; -} From c978f1b10aba8ce4f8e1f6fcc86b174e08a6e7f7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 31 Jan 2022 17:07:38 -0800 Subject: [PATCH 19/48] cxl/port: Up-level cxl_add_dport() locking requirements to the caller In preparation for moving dport enumeration into the core, require the port device lock to be acquired by the caller. Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164367759016.324231.105551648350470000.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 2 ++ drivers/cxl/core/port.c | 3 +-- tools/testing/cxl/mock_acpi.c | 4 ++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index ab2b76532272..5d848b77d8e8 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -342,7 +342,9 @@ static int add_host_bridge_dport(struct device *match, void *arg) return 0; } + cxl_device_lock(&root_port->dev); rc = cxl_add_dport(root_port, match, uid, ctx.chbcr); + cxl_device_unlock(&root_port->dev); if (rc) { dev_err(host, "failed to add downstream port: %s\n", dev_name(match)); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index af7a515e4572..369cc52e0837 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -519,7 +519,7 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) { struct cxl_dport *dup; - cxl_device_lock(&port->dev); + device_lock_assert(&port->dev); dup = find_dport(port, new->port_id); if (dup) dev_err(&port->dev, @@ -528,7 +528,6 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) dev_name(dup->dport)); else list_add_tail(&new->list, &port->dports); - cxl_device_unlock(&port->dev); return dup ? -EEXIST : 0; } diff --git a/tools/testing/cxl/mock_acpi.c b/tools/testing/cxl/mock_acpi.c index 4c8a493ace56..c953e3ab6494 100644 --- a/tools/testing/cxl/mock_acpi.c +++ b/tools/testing/cxl/mock_acpi.c @@ -57,7 +57,9 @@ static int match_add_root_port(struct pci_dev *pdev, void *data) /* TODO walk DVSEC to find component register base */ port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); + cxl_device_lock(&port->dev); rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE); + cxl_device_unlock(&port->dev); if (rc) { dev_err(dev, "failed to add dport: %s (%d)\n", dev_name(&pdev->dev), rc); @@ -78,7 +80,9 @@ static int mock_add_root_port(struct platform_device *pdev, void *data) struct device *dev = ctx->dev; int rc; + cxl_device_lock(&port->dev); rc = cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); + cxl_device_unlock(&port->dev); if (rc) { dev_err(dev, "failed to add dport: %s (%d)\n", dev_name(&pdev->dev), rc); From af9cae9facc2de773b4aa59916913cfd6e18bdd0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:30:25 -0800 Subject: [PATCH 20/48] cxl/pci: Rename pci.h to cxlpci.h Similar to the mem.h rename, if the core wants to reuse definitions from drivers/cxl/pci.h it is unable to use as that collides with archs that have an arch/$arch/include/asm/pci.h, like MIPS. Reported-by: kernel test robot Acked-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164298422510.3018233.14693126572756675563.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 2 +- drivers/cxl/core/regs.c | 2 +- drivers/cxl/{pci.h => cxlpci.h} | 1 + drivers/cxl/pci.c | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) rename drivers/cxl/{pci.h => cxlpci.h} (99%) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 5d848b77d8e8..8d4fd7534e1e 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -6,8 +6,8 @@ #include #include #include +#include "cxlpci.h" #include "cxl.h" -#include "pci.h" /* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */ #define CFMWS_INTERLEAVE_WAYS(x) (1 << (x)->interleave_ways) diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 12a6cbddf110..65d7f5880671 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include /** * DOC: cxl registers diff --git a/drivers/cxl/pci.h b/drivers/cxl/cxlpci.h similarity index 99% rename from drivers/cxl/pci.h rename to drivers/cxl/cxlpci.h index 0623bb85f30a..eb00f597a157 100644 --- a/drivers/cxl/pci.h +++ b/drivers/cxl/cxlpci.h @@ -2,6 +2,7 @@ /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ #ifndef __CXL_PCI_H__ #define __CXL_PCI_H__ +#include "cxl.h" #define CXL_MEMORY_PROGIF 0x10 diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 7d5f0c873aba..8b435b875b65 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -10,7 +10,7 @@ #include #include #include "cxlmem.h" -#include "pci.h" +#include "cxlpci.h" #include "cxl.h" /** From 98d2d3a264543680281fd8a4e6ae490ca26b4f85 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 31 Jan 2022 18:10:04 -0800 Subject: [PATCH 21/48] cxl/core: Generalize dport enumeration in the core The core houses infrastructure for decoder resources. A CXL port's dports are more closely related to decoder infrastructure than topology enumeration. Implement generic PCI based dport enumeration in the core, i.e. arrange for existing root port enumeration from cxl_acpi to share code with switch port enumeration which just amounts to a small difference in a pci_walk_bus() invocation once the appropriate 'struct pci_bus' has been retrieved. Set the convention that decoder objects are registered after all dports are enumerated. This enables userspace to know when the CXL core is finished establishing 'dportX' links underneath the 'portX' object. Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164368114191.354031.5270501846455462665.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 67 +++------------------- drivers/cxl/core/Makefile | 1 + drivers/cxl/core/pci.c | 101 ++++++++++++++++++++++++++++++++++ drivers/cxl/core/port.c | 91 +++++++++++++++++------------- drivers/cxl/cxl.h | 16 ++---- drivers/cxl/cxlpci.h | 1 + tools/testing/cxl/Kbuild | 3 +- tools/testing/cxl/mock_acpi.c | 78 -------------------------- tools/testing/cxl/test/cxl.c | 67 +++++++++++++++------- tools/testing/cxl/test/mock.c | 45 +++++++-------- tools/testing/cxl/test/mock.h | 6 +- 11 files changed, 238 insertions(+), 238 deletions(-) create mode 100644 drivers/cxl/core/pci.c diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 8d4fd7534e1e..259441245687 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -130,48 +130,6 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, return 0; } -__mock int match_add_root_ports(struct pci_dev *pdev, void *data) -{ - resource_size_t creg = CXL_RESOURCE_NONE; - struct cxl_walk_context *ctx = data; - struct pci_bus *root_bus = ctx->root; - struct cxl_port *port = ctx->port; - int type = pci_pcie_type(pdev); - struct device *dev = ctx->dev; - struct cxl_register_map map; - u32 lnkcap, port_num; - int rc; - - if (pdev->bus != root_bus) - return 0; - if (!pci_is_pcie(pdev)) - return 0; - if (type != PCI_EXP_TYPE_ROOT_PORT) - return 0; - if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, - &lnkcap) != PCIBIOS_SUCCESSFUL) - return 0; - - /* The driver doesn't rely on component registers for Root Ports yet. */ - rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); - if (!rc) - dev_info(&pdev->dev, "No component register block found\n"); - - creg = cxl_regmap_to_base(pdev, &map); - - port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); - rc = cxl_add_dport(port, &pdev->dev, port_num, creg); - if (rc) { - ctx->error = rc; - return rc; - } - ctx->count++; - - dev_dbg(dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev)); - - return 0; -} - static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device *dev) { struct cxl_dport *dport; @@ -210,7 +168,6 @@ static int add_host_bridge_uport(struct device *match, void *arg) struct device *host = root_port->dev.parent; struct acpi_device *bridge = to_cxl_host_bridge(host, match); struct acpi_pci_root *pci_root; - struct cxl_walk_context ctx; int single_port_map[1], rc; struct cxl_decoder *cxld; struct cxl_dport *dport; @@ -240,18 +197,10 @@ static int add_host_bridge_uport(struct device *match, void *arg) return PTR_ERR(port); dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev)); - ctx = (struct cxl_walk_context){ - .dev = host, - .root = pci_root->bus, - .port = port, - }; - pci_walk_bus(pci_root->bus, match_add_root_ports, &ctx); - - if (ctx.count == 0) - return -ENODEV; - if (ctx.error) - return ctx.error; - if (ctx.count > 1) + rc = devm_cxl_port_enumerate_dports(host, port); + if (rc < 0) + return rc; + if (rc > 1) return 0; /* TODO: Scan CHBCR for HDM Decoder resources */ @@ -311,9 +260,9 @@ static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg, static int add_host_bridge_dport(struct device *match, void *arg) { - int rc; acpi_status status; unsigned long long uid; + struct cxl_dport *dport; struct cxl_chbs_context ctx; struct cxl_port *root_port = arg; struct device *host = root_port->dev.parent; @@ -343,12 +292,12 @@ static int add_host_bridge_dport(struct device *match, void *arg) } cxl_device_lock(&root_port->dev); - rc = cxl_add_dport(root_port, match, uid, ctx.chbcr); + dport = devm_cxl_add_dport(host, root_port, match, uid, ctx.chbcr); cxl_device_unlock(&root_port->dev); - if (rc) { + if (IS_ERR(dport)) { dev_err(host, "failed to add downstream port: %s\n", dev_name(match)); - return rc; + return PTR_ERR(dport); } dev_dbg(host, "add dport%llu: %s\n", uid, dev_name(match)); return 0; diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index a90202ac88d2..91057f0ec763 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -7,3 +7,4 @@ cxl_core-y += pmem.o cxl_core-y += regs.o cxl_core-y += memdev.o cxl_core-y += mbox.o +cxl_core-y += pci.o diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c new file mode 100644 index 000000000000..c5a9e03ed477 --- /dev/null +++ b/drivers/cxl/core/pci.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include +#include "core.h" + +/** + * DOC: cxl core pci + * + * Compute Express Link protocols are layered on top of PCIe. CXL core provides + * a set of helpers for CXL interactions which occur via PCIe. + */ + +struct cxl_walk_context { + struct pci_bus *bus; + struct device *host; + struct cxl_port *port; + int type; + int error; + int count; +}; + +static int match_add_dports(struct pci_dev *pdev, void *data) +{ + struct cxl_walk_context *ctx = data; + struct cxl_port *port = ctx->port; + int type = pci_pcie_type(pdev); + struct cxl_register_map map; + struct cxl_dport *dport; + u32 lnkcap, port_num; + int rc; + + if (pdev->bus != ctx->bus) + return 0; + if (!pci_is_pcie(pdev)) + return 0; + if (type != ctx->type) + return 0; + if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, + &lnkcap)) + return 0; + + rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + if (rc) + dev_dbg(&port->dev, "failed to find component registers\n"); + + port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); + cxl_device_lock(&port->dev); + dport = devm_cxl_add_dport(ctx->host, port, &pdev->dev, port_num, + cxl_regmap_to_base(pdev, &map)); + cxl_device_unlock(&port->dev); + if (IS_ERR(dport)) { + ctx->error = PTR_ERR(dport); + return PTR_ERR(dport); + } + ctx->count++; + + dev_dbg(&port->dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev)); + + return 0; +} + +/** + * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port + * @host: devm context + * @port: cxl_port whose ->uport is the upstream of dports to be enumerated + * + * Returns a positive number of dports enumerated or a negative error + * code. + */ +int devm_cxl_port_enumerate_dports(struct device *host, struct cxl_port *port) +{ + struct pci_bus *bus = cxl_port_to_pci_bus(port); + struct cxl_walk_context ctx; + int type; + + if (!bus) + return -ENXIO; + + if (pci_is_root_bus(bus)) + type = PCI_EXP_TYPE_ROOT_PORT; + else + type = PCI_EXP_TYPE_DOWNSTREAM; + + ctx = (struct cxl_walk_context) { + .host = host, + .port = port, + .bus = bus, + .type = type, + }; + pci_walk_bus(bus, match_add_dports, &ctx); + + if (ctx.count == 0) + return -ENODEV; + if (ctx.error) + return ctx.error; + return ctx.count; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 369cc52e0837..fee9c7affef4 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -243,22 +243,10 @@ struct cxl_decoder *to_cxl_decoder(struct device *dev) } EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL); -static void cxl_dport_release(struct cxl_dport *dport) -{ - list_del(&dport->list); - put_device(dport->dport); - kfree(dport); -} - static void cxl_port_release(struct device *dev) { struct cxl_port *port = to_cxl_port(dev); - struct cxl_dport *dport, *_d; - cxl_device_lock(dev); - list_for_each_entry_safe(dport, _d, &port->dports, list) - cxl_dport_release(dport); - cxl_device_unlock(dev); ida_free(&cxl_port_ida, port->id); kfree(port); } @@ -292,18 +280,7 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_port, CXL); static void unregister_port(void *_port) { struct cxl_port *port = _port; - struct cxl_dport *dport; - cxl_device_lock(&port->dev); - list_for_each_entry(dport, &port->dports, list) { - char link_name[CXL_TARGET_STRLEN]; - - if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", - dport->port_id) >= CXL_TARGET_STRLEN) - continue; - sysfs_remove_link(&port->dev.kobj, link_name); - } - cxl_device_unlock(&port->dev); device_unregister(&port->dev); } @@ -532,51 +509,87 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) return dup ? -EEXIST : 0; } +static void cxl_dport_remove(void *data) +{ + struct cxl_dport *dport = data; + struct cxl_port *port = dport->port; + + put_device(dport->dport); + cxl_device_lock(&port->dev); + list_del(&dport->list); + cxl_device_unlock(&port->dev); +} + +static void cxl_dport_unlink(void *data) +{ + struct cxl_dport *dport = data; + struct cxl_port *port = dport->port; + char link_name[CXL_TARGET_STRLEN]; + + sprintf(link_name, "dport%d", dport->port_id); + sysfs_remove_link(&port->dev.kobj, link_name); +} + /** - * cxl_add_dport - append downstream port data to a cxl_port + * devm_cxl_add_dport - append downstream port data to a cxl_port + * @host: devm context for allocations * @port: the cxl_port that references this dport * @dport_dev: firmware or PCI device representing the dport * @port_id: identifier for this dport in a decoder's target list * @component_reg_phys: optional location of CXL component registers * - * Note that all allocations and links are undone by cxl_port deletion - * and release. + * Note that dports are appended to the devm release action's of the + * either the port's host (for root ports), or the port itself (for + * switch ports) */ -int cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id, - resource_size_t component_reg_phys) +struct cxl_dport *devm_cxl_add_dport(struct device *host, struct cxl_port *port, + struct device *dport_dev, int port_id, + resource_size_t component_reg_phys) { char link_name[CXL_TARGET_STRLEN]; struct cxl_dport *dport; int rc; + if (!host->driver) { + dev_WARN_ONCE(&port->dev, 1, "dport:%s bad devm context\n", + dev_name(dport_dev)); + return ERR_PTR(-ENXIO); + } + if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >= CXL_TARGET_STRLEN) - return -EINVAL; + return ERR_PTR(-EINVAL); - dport = kzalloc(sizeof(*dport), GFP_KERNEL); + dport = devm_kzalloc(host, sizeof(*dport), GFP_KERNEL); if (!dport) - return -ENOMEM; + return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&dport->list); - dport->dport = get_device(dport_dev); + dport->dport = dport_dev; dport->port_id = port_id; dport->component_reg_phys = component_reg_phys; dport->port = port; rc = add_dport(port, dport); if (rc) - goto err; + return ERR_PTR(rc); + + get_device(dport_dev); + rc = devm_add_action_or_reset(host, cxl_dport_remove, dport); + if (rc) + return ERR_PTR(rc); rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name); if (rc) - goto err; + return ERR_PTR(rc); - return 0; -err: - cxl_dport_release(dport); - return rc; + rc = devm_add_action_or_reset(host, cxl_dport_unlink, dport); + if (rc) + return ERR_PTR(rc); + + return dport; } -EXPORT_SYMBOL_NS_GPL(cxl_add_dport, CXL); +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL); static int decoder_populate_targets(struct cxl_decoder *cxld, struct cxl_port *port, int *target_map) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 61b0db526fa2..0754c68ccd33 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -236,14 +236,6 @@ struct cxl_nvdimm { struct nvdimm *nvdimm; }; -struct cxl_walk_context { - struct device *dev; - struct pci_bus *root; - struct cxl_port *port; - int error; - int count; -}; - /** * struct cxl_port - logical collection of upstream port devices and * downstream port devices to construct a CXL memory @@ -295,17 +287,17 @@ static inline bool is_cxl_root(struct cxl_port *port) bool is_cxl_port(struct device *dev); struct cxl_port *to_cxl_port(struct device *dev); +struct pci_bus; int devm_cxl_register_pci_bus(struct device *host, struct device *uport, struct pci_bus *bus); struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port); struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, resource_size_t component_reg_phys, struct cxl_port *parent_port); - -int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id, - resource_size_t component_reg_phys); struct cxl_port *find_cxl_root(struct device *dev); - +struct cxl_dport *devm_cxl_add_dport(struct device *host, struct cxl_port *port, + struct device *dport, int port_id, + resource_size_t component_reg_phys); struct cxl_decoder *to_cxl_decoder(struct device *dev); bool is_root_decoder(struct device *dev); bool is_cxl_decoder(struct device *dev); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index eb00f597a157..103636fda198 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -57,4 +57,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, return pci_resource_start(pdev, map->barno) + map->block_offset; } +int devm_cxl_port_enumerate_dports(struct device *host, struct cxl_port *port); #endif /* __CXL_PCI_H__ */ diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index ddaee8a2c418..61123544aa49 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -3,8 +3,8 @@ ldflags-y += --wrap=acpi_table_parse_cedt ldflags-y += --wrap=is_acpi_device_node ldflags-y += --wrap=acpi_evaluate_integer ldflags-y += --wrap=acpi_pci_find_root -ldflags-y += --wrap=pci_walk_bus ldflags-y += --wrap=nvdimm_bus_register +ldflags-y += --wrap=devm_cxl_port_enumerate_dports DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl @@ -30,6 +30,7 @@ cxl_core-y += $(CXL_CORE_SRC)/pmem.o cxl_core-y += $(CXL_CORE_SRC)/regs.o cxl_core-y += $(CXL_CORE_SRC)/memdev.o cxl_core-y += $(CXL_CORE_SRC)/mbox.o +cxl_core-y += $(CXL_CORE_SRC)/pci.o cxl_core-y += config_check.o obj-m += test/ diff --git a/tools/testing/cxl/mock_acpi.c b/tools/testing/cxl/mock_acpi.c index c953e3ab6494..55813de26d46 100644 --- a/tools/testing/cxl/mock_acpi.c +++ b/tools/testing/cxl/mock_acpi.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include "test/mock.h" @@ -34,80 +33,3 @@ struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev) put_cxl_mock_ops(index); return found; } - -static int match_add_root_port(struct pci_dev *pdev, void *data) -{ - struct cxl_walk_context *ctx = data; - struct pci_bus *root_bus = ctx->root; - struct cxl_port *port = ctx->port; - int type = pci_pcie_type(pdev); - struct device *dev = ctx->dev; - u32 lnkcap, port_num; - int rc; - - if (pdev->bus != root_bus) - return 0; - if (!pci_is_pcie(pdev)) - return 0; - if (type != PCI_EXP_TYPE_ROOT_PORT) - return 0; - if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, - &lnkcap) != PCIBIOS_SUCCESSFUL) - return 0; - - /* TODO walk DVSEC to find component register base */ - port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); - cxl_device_lock(&port->dev); - rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE); - cxl_device_unlock(&port->dev); - if (rc) { - dev_err(dev, "failed to add dport: %s (%d)\n", - dev_name(&pdev->dev), rc); - ctx->error = rc; - return rc; - } - ctx->count++; - - dev_dbg(dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev)); - - return 0; -} - -static int mock_add_root_port(struct platform_device *pdev, void *data) -{ - struct cxl_walk_context *ctx = data; - struct cxl_port *port = ctx->port; - struct device *dev = ctx->dev; - int rc; - - cxl_device_lock(&port->dev); - rc = cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); - cxl_device_unlock(&port->dev); - if (rc) { - dev_err(dev, "failed to add dport: %s (%d)\n", - dev_name(&pdev->dev), rc); - ctx->error = rc; - return rc; - } - ctx->count++; - - dev_dbg(dev, "add dport%d: %s\n", pdev->id, dev_name(&pdev->dev)); - - return 0; -} - -int match_add_root_ports(struct pci_dev *dev, void *data) -{ - int index, rc; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - struct platform_device *pdev = (struct platform_device *) dev; - - if (ops && ops->is_mock_port(pdev)) - rc = mock_add_root_port(pdev, data); - else - rc = match_add_root_port(dev, data); - - put_cxl_mock_ops(index); - - return rc; -} diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 736d99006fb7..ef002e909d38 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -317,6 +317,19 @@ static bool is_mock_bridge(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++) if (dev == &cxl_host_bridge[i]->dev) return true; + return false; +} + +static bool is_mock_port(struct device *dev) +{ + int i; + + if (is_mock_bridge(dev)) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) + if (dev == &cxl_root_port[i]->dev) + return true; return false; } @@ -366,26 +379,6 @@ static struct acpi_pci_root mock_pci_root[NR_CXL_HOST_BRIDGES] = { }, }; -static struct platform_device *mock_cxl_root_port(struct pci_bus *bus, int index) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(mock_pci_bus); i++) - if (bus == &mock_pci_bus[i]) - return cxl_root_port[index + i * NR_CXL_ROOT_PORTS]; - return NULL; -} - -static bool is_mock_port(struct platform_device *pdev) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) - if (pdev == cxl_root_port[i]) - return true; - return false; -} - static bool is_mock_bus(struct pci_bus *bus) { int i; @@ -405,16 +398,47 @@ static struct acpi_pci_root *mock_acpi_pci_find_root(acpi_handle handle) return &mock_pci_root[host_bridge_index(adev)]; } +static int mock_cxl_port_enumerate_dports(struct device *host, + struct cxl_port *port) +{ + struct device *dev = &port->dev; + int i; + + for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) { + struct platform_device *pdev = cxl_root_port[i]; + struct cxl_dport *dport; + + if (pdev->dev.parent != port->uport) + continue; + + cxl_device_lock(&port->dev); + dport = devm_cxl_add_dport(host, port, &pdev->dev, pdev->id, + CXL_RESOURCE_NONE); + cxl_device_unlock(&port->dev); + + if (IS_ERR(dport)) { + dev_err(dev, "failed to add dport: %s (%ld)\n", + dev_name(&pdev->dev), PTR_ERR(dport)); + return PTR_ERR(dport); + } + + dev_dbg(dev, "add dport%d: %s\n", pdev->id, + dev_name(&pdev->dev)); + } + + return 0; +} + static struct cxl_mock_ops cxl_mock_ops = { .is_mock_adev = is_mock_adev, .is_mock_bridge = is_mock_bridge, .is_mock_bus = is_mock_bus, .is_mock_port = is_mock_port, .is_mock_dev = is_mock_dev, - .mock_port = mock_cxl_root_port, .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, .acpi_pci_find_root = mock_acpi_pci_find_root, + .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; @@ -598,3 +622,4 @@ module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); +MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 17408f892df4..56b4b7d734bc 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include "mock.h" static LIST_HEAD(mock); @@ -114,32 +116,6 @@ struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle) } EXPORT_SYMBOL_GPL(__wrap_acpi_pci_find_root); -void __wrap_pci_walk_bus(struct pci_bus *bus, - int (*cb)(struct pci_dev *, void *), void *userdata) -{ - int index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_bus(bus)) { - int rc, i; - - /* - * Simulate 2 root ports per host-bridge and no - * depth recursion. - */ - for (i = 0; i < 2; i++) { - rc = cb((struct pci_dev *) ops->mock_port(bus, i), - userdata); - if (rc) - break; - } - } else - pci_walk_bus(bus, cb, userdata); - - put_cxl_mock_ops(index); -} -EXPORT_SYMBOL_GPL(__wrap_pci_walk_bus); - struct nvdimm_bus * __wrap_nvdimm_bus_register(struct device *dev, struct nvdimm_bus_descriptor *nd_desc) @@ -155,5 +131,22 @@ __wrap_nvdimm_bus_register(struct device *dev, } EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); +int __wrap_devm_cxl_port_enumerate_dports(struct device *host, + struct cxl_port *port) +{ + int rc, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + rc = ops->devm_cxl_port_enumerate_dports(host, port); + else + rc = devm_cxl_port_enumerate_dports(host, port); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); +MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 15ed0fd877e4..99e7ff38090d 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -2,6 +2,7 @@ #include #include +#include struct cxl_mock_ops { struct list_head list; @@ -15,10 +16,11 @@ struct cxl_mock_ops { struct acpi_object_list *arguments, unsigned long long *data); struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle); - struct platform_device *(*mock_port)(struct pci_bus *bus, int index); bool (*is_mock_bus)(struct pci_bus *bus); - bool (*is_mock_port)(struct platform_device *pdev); + bool (*is_mock_port)(struct device *dev); bool (*is_mock_dev)(struct device *dev); + int (*devm_cxl_port_enumerate_dports)(struct device *host, + struct cxl_port *port); }; void register_cxl_mock_ops(struct cxl_mock_ops *ops); From d17d0540a0dbf109210f7b57a37571e2978da0fa Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 1 Feb 2022 12:24:30 -0800 Subject: [PATCH 22/48] cxl/core/hdm: Add CXL standard decoder enumeration to the core Unlike the decoder enumeration for "root decoders" described by platform firmware, standard decoders can be enumerated from the component registers space once the base address has been identified (via PCI, ACPI, or another mechanism). Add common infrastructure for HDM (Host-managed-Device-Memory) Decoder enumeration and share it between host-bridge, upstream switch port, and cxl_test defined decoders. The locking model for switch level decoders is to hold the port lock over the enumeration. This facilitates moving the dport and decoder enumeration to a 'port' driver. For now, the only enumerator of decoder resources is the cxl_acpi root driver. Co-developed-by: Ben Widawsky Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164374688404.395335.9239248252443123526.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 43 ++---- drivers/cxl/core/Makefile | 1 + drivers/cxl/core/core.h | 2 + drivers/cxl/core/hdm.c | 248 ++++++++++++++++++++++++++++++++++ drivers/cxl/core/port.c | 57 ++++++-- drivers/cxl/core/regs.c | 5 +- drivers/cxl/cxl.h | 33 ++++- drivers/cxl/cxlmem.h | 8 ++ tools/testing/cxl/Kbuild | 4 + tools/testing/cxl/test/cxl.c | 29 ++++ tools/testing/cxl/test/mock.c | 50 +++++++ tools/testing/cxl/test/mock.h | 3 + 12 files changed, 434 insertions(+), 49 deletions(-) create mode 100644 drivers/cxl/core/hdm.c diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 259441245687..8c2ced91518b 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -168,10 +168,10 @@ static int add_host_bridge_uport(struct device *match, void *arg) struct device *host = root_port->dev.parent; struct acpi_device *bridge = to_cxl_host_bridge(host, match); struct acpi_pci_root *pci_root; - int single_port_map[1], rc; - struct cxl_decoder *cxld; struct cxl_dport *dport; + struct cxl_hdm *cxlhdm; struct cxl_port *port; + int rc; if (!bridge) return 0; @@ -200,37 +200,24 @@ static int add_host_bridge_uport(struct device *match, void *arg) rc = devm_cxl_port_enumerate_dports(host, port); if (rc < 0) return rc; - if (rc > 1) - return 0; - - /* TODO: Scan CHBCR for HDM Decoder resources */ - - /* - * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability - * Structure) single ported host-bridges need not publish a decoder - * capability when a passthrough decode can be assumed, i.e. all - * transactions that the uport sees are claimed and passed to the single - * dport. Disable the range until the first CXL region is enumerated / - * activated. - */ - cxld = cxl_switch_decoder_alloc(port, 1); - if (IS_ERR(cxld)) - return PTR_ERR(cxld); - cxl_device_lock(&port->dev); - dport = list_first_entry(&port->dports, typeof(*dport), list); - cxl_device_unlock(&port->dev); + if (rc == 1) { + rc = devm_cxl_add_passthrough_decoder(host, port); + goto out; + } - single_port_map[0] = dport->port_id; + cxlhdm = devm_cxl_setup_hdm(host, port); + if (IS_ERR(cxlhdm)) { + rc = PTR_ERR(cxlhdm); + goto out; + } - rc = cxl_decoder_add(cxld, single_port_map); + rc = devm_cxl_enumerate_decoders(host, cxlhdm); if (rc) - put_device(&cxld->dev); - else - rc = cxl_decoder_autoremove(host, cxld); + dev_err(&port->dev, "Couldn't enumerate decoders (%d)\n", rc); - if (rc == 0) - dev_dbg(host, "add: %s\n", dev_name(&cxld->dev)); +out: + cxl_device_unlock(&port->dev); return rc; } diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 91057f0ec763..6d37cd78b151 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -8,3 +8,4 @@ cxl_core-y += regs.o cxl_core-y += memdev.o cxl_core-y += mbox.o cxl_core-y += pci.o +cxl_core-y += hdm.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index e0c9aacc4e9c..1a50c0fc399c 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -14,6 +14,8 @@ struct cxl_mem_query_commands; int cxl_query_cmd(struct cxl_memdev *cxlmd, struct cxl_mem_query_commands __user *q); int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s); +void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, + resource_size_t length); int cxl_memdev_init(void); void cxl_memdev_exit(void); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c new file mode 100644 index 000000000000..84f4ed288a88 --- /dev/null +++ b/drivers/cxl/core/hdm.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include +#include +#include + +#include "cxlmem.h" +#include "core.h" + +/** + * DOC: cxl core hdm + * + * Compute Express Link Host Managed Device Memory, starting with the + * CXL 2.0 specification, is managed by an array of HDM Decoder register + * instances per CXL port and per CXL endpoint. Define common helpers + * for enumerating these registers and capabilities. + */ + +static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, + int *target_map) +{ + int rc; + + rc = cxl_decoder_add_locked(cxld, target_map); + if (rc) { + put_device(&cxld->dev); + dev_err(&port->dev, "Failed to add decoder\n"); + return rc; + } + + rc = cxl_decoder_autoremove(&port->dev, cxld); + if (rc) + return rc; + + dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev)); + + return 0; +} + +/* + * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability Structure) + * single ported host-bridges need not publish a decoder capability when a + * passthrough decode can be assumed, i.e. all transactions that the uport sees + * are claimed and passed to the single dport. Disable the range until the first + * CXL region is enumerated / activated. + */ +int devm_cxl_add_passthrough_decoder(struct device *host, struct cxl_port *port) +{ + struct cxl_decoder *cxld; + struct cxl_dport *dport; + int single_port_map[1]; + + cxld = cxl_switch_decoder_alloc(port, 1); + if (IS_ERR(cxld)) + return PTR_ERR(cxld); + + device_lock_assert(&port->dev); + + dport = list_first_entry(&port->dports, typeof(*dport), list); + single_port_map[0] = dport->port_id; + + return add_hdm_decoder(port, cxld, single_port_map); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, CXL); + +static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) +{ + u32 hdm_cap; + + hdm_cap = readl(cxlhdm->regs.hdm_decoder + CXL_HDM_DECODER_CAP_OFFSET); + cxlhdm->decoder_count = cxl_hdm_decoder_count(hdm_cap); + cxlhdm->target_count = + FIELD_GET(CXL_HDM_DECODER_TARGET_COUNT_MASK, hdm_cap); + if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_11_8, hdm_cap)) + cxlhdm->interleave_mask |= GENMASK(11, 8); + if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap)) + cxlhdm->interleave_mask |= GENMASK(14, 12); +} + +static void __iomem *map_hdm_decoder_regs(struct cxl_port *port, + void __iomem *crb) +{ + struct cxl_component_reg_map map; + + cxl_probe_component_regs(&port->dev, crb, &map); + if (!map.hdm_decoder.valid) { + dev_err(&port->dev, "HDM decoder registers invalid\n"); + return IOMEM_ERR_PTR(-ENXIO); + } + + return crb + map.hdm_decoder.offset; +} + +/** + * devm_cxl_setup_hdm - map HDM decoder component registers + * @host: devm context for allocations + * @port: cxl_port to map + */ +struct cxl_hdm *devm_cxl_setup_hdm(struct device *host, struct cxl_port *port) +{ + struct device *dev = &port->dev; + void __iomem *crb, *hdm; + struct cxl_hdm *cxlhdm; + + cxlhdm = devm_kzalloc(host, sizeof(*cxlhdm), GFP_KERNEL); + if (!cxlhdm) + return ERR_PTR(-ENOMEM); + + cxlhdm->port = port; + crb = devm_cxl_iomap_block(host, port->component_reg_phys, + CXL_COMPONENT_REG_BLOCK_SIZE); + if (!crb) { + dev_err(dev, "No component registers mapped\n"); + return ERR_PTR(-ENXIO); + } + + hdm = map_hdm_decoder_regs(port, crb); + if (IS_ERR(hdm)) + return ERR_CAST(hdm); + cxlhdm->regs.hdm_decoder = hdm; + + parse_hdm_decoder_caps(cxlhdm); + if (cxlhdm->decoder_count == 0) { + dev_err(dev, "Spec violation. Caps invalid\n"); + return ERR_PTR(-ENXIO); + } + + return cxlhdm; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL); + +static int to_interleave_granularity(u32 ctrl) +{ + int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl); + + return 256 << val; +} + +static int to_interleave_ways(u32 ctrl) +{ + int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl); + + switch (val) { + case 0 ... 4: + return 1 << val; + case 8 ... 10: + return 3 << (val - 8); + default: + return 0; + } +} + +static void init_hdm_decoder(struct cxl_decoder *cxld, int *target_map, + void __iomem *hdm, int which) +{ + u64 size, base; + u32 ctrl; + int i; + union { + u64 value; + unsigned char target_id[8]; + } target_list; + + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which)); + base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which)); + size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which)); + + if (!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)) + size = 0; + + cxld->decoder_range = (struct range) { + .start = base, + .end = base + size - 1, + }; + + /* switch decoders are always enabled if committed */ + if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED) { + cxld->flags |= CXL_DECODER_F_ENABLE; + if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK) + cxld->flags |= CXL_DECODER_F_LOCK; + } + cxld->interleave_ways = to_interleave_ways(ctrl); + cxld->interleave_granularity = to_interleave_granularity(ctrl); + + if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl)) + cxld->target_type = CXL_DECODER_EXPANDER; + else + cxld->target_type = CXL_DECODER_ACCELERATOR; + + target_list.value = + ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which)); + for (i = 0; i < cxld->interleave_ways; i++) + target_map[i] = target_list.target_id[i]; +} + +/** + * devm_cxl_enumerate_decoders - add decoder objects per HDM register set + * @host: devm allocation context + * @cxlhdm: Structure to populate with HDM capabilities + */ +int devm_cxl_enumerate_decoders(struct device *host, struct cxl_hdm *cxlhdm) +{ + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + struct cxl_port *port = cxlhdm->port; + int i, committed; + u32 ctrl; + + /* + * Since the register resource was recently claimed via request_region() + * be careful about trusting the "not-committed" status until the commit + * timeout has elapsed. The commit timeout is 10ms (CXL 2.0 + * 8.2.5.12.20), but double it to be tolerant of any clock skew between + * host and target. + */ + for (i = 0, committed = 0; i < cxlhdm->decoder_count; i++) { + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); + if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED) + committed++; + } + + /* ensure that future checks of committed can be trusted */ + if (committed != cxlhdm->decoder_count) + msleep(20); + + for (i = 0; i < cxlhdm->decoder_count; i++) { + int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; + int rc, target_count = cxlhdm->target_count; + struct cxl_decoder *cxld; + + cxld = cxl_switch_decoder_alloc(port, target_count); + if (IS_ERR(cxld)) { + dev_warn(&port->dev, + "Failed to allocate the decoder\n"); + return PTR_ERR(cxld); + } + + init_hdm_decoder(cxld, target_map, cxlhdm->regs.hdm_decoder, i); + rc = add_hdm_decoder(port, cxld, target_map); + if (rc) { + dev_warn(&port->dev, + "Failed to add decoder to port\n"); + return rc; + } + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_decoders, CXL); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index fee9c7affef4..4dfb9df9e648 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -594,16 +594,15 @@ EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL); static int decoder_populate_targets(struct cxl_decoder *cxld, struct cxl_port *port, int *target_map) { - int rc = 0, i; + int i, rc = 0; if (!target_map) return 0; - cxl_device_lock(&port->dev); - if (list_empty(&port->dports)) { - rc = -EINVAL; - goto out_unlock; - } + device_lock_assert(&port->dev); + + if (list_empty(&port->dports)) + return -EINVAL; write_seqlock(&cxld->target_lock); for (i = 0; i < cxld->nr_targets; i++) { @@ -617,9 +616,6 @@ static int decoder_populate_targets(struct cxl_decoder *cxld, } write_sequnlock(&cxld->target_lock); -out_unlock: - cxl_device_unlock(&port->dev); - return rc; } @@ -721,7 +717,7 @@ struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, EXPORT_SYMBOL_NS_GPL(cxl_switch_decoder_alloc, CXL); /** - * cxl_decoder_add - Add a decoder with targets + * cxl_decoder_add_locked - Add a decoder with targets * @cxld: The cxl decoder allocated by cxl_decoder_alloc() * @target_map: A list of downstream ports that this decoder can direct memory * traffic to. These numbers should correspond with the port number @@ -731,12 +727,15 @@ EXPORT_SYMBOL_NS_GPL(cxl_switch_decoder_alloc, CXL); * is an endpoint device. A more awkward example is a hostbridge whose root * ports get hot added (technically possible, though unlikely). * - * Context: Process context. Takes and releases the cxld's device lock. + * This is the locked variant of cxl_decoder_add(). + * + * Context: Process context. Expects the device lock of the port that owns the + * @cxld to be held. * * Return: Negative error code if the decoder wasn't properly configured; else * returns 0. */ -int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) +int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) { struct cxl_port *port; struct device *dev; @@ -770,6 +769,40 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) return device_add(dev); } +EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, CXL); + +/** + * cxl_decoder_add - Add a decoder with targets + * @cxld: The cxl decoder allocated by cxl_decoder_alloc() + * @target_map: A list of downstream ports that this decoder can direct memory + * traffic to. These numbers should correspond with the port number + * in the PCIe Link Capabilities structure. + * + * This is the unlocked variant of cxl_decoder_add_locked(). + * See cxl_decoder_add_locked(). + * + * Context: Process context. Takes and releases the device lock of the port that + * owns the @cxld. + */ +int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) +{ + struct cxl_port *port; + int rc; + + if (WARN_ON_ONCE(!cxld)) + return -EINVAL; + + if (WARN_ON_ONCE(IS_ERR(cxld))) + return PTR_ERR(cxld); + + port = to_cxl_port(cxld->dev.parent); + + cxl_device_lock(&port->dev); + rc = cxl_decoder_add_locked(cxld, target_map); + cxl_device_unlock(&port->dev); + + return rc; +} EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL); static void cxld_unregister(void *dev) diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 65d7f5880671..718b6b0ae4b3 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -159,9 +159,8 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base, } EXPORT_SYMBOL_NS_GPL(cxl_probe_device_regs, CXL); -static void __iomem *devm_cxl_iomap_block(struct device *dev, - resource_size_t addr, - resource_size_t length) +void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, + resource_size_t length) { void __iomem *ret_val; struct resource *res; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 0754c68ccd33..c127d5c0ac96 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -17,6 +17,9 @@ * (port-driver, region-driver, nvdimm object-drivers... etc). */ +/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */ +#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K + /* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers*/ #define CXL_CM_OFFSET 0x1000 #define CXL_CM_CAP_HDR_OFFSET 0x0 @@ -36,11 +39,23 @@ #define CXL_HDM_DECODER_CAP_OFFSET 0x0 #define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0) #define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4) -#define CXL_HDM_DECODER0_BASE_LOW_OFFSET 0x10 -#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET 0x14 -#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET 0x18 -#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET 0x1c -#define CXL_HDM_DECODER0_CTRL_OFFSET 0x20 +#define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8) +#define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9) +#define CXL_HDM_DECODER_CTRL_OFFSET 0x4 +#define CXL_HDM_DECODER_ENABLE BIT(1) +#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10) +#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14) +#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18) +#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c) +#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20) +#define CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0) +#define CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4) +#define CXL_HDM_DECODER0_CTRL_LOCK BIT(8) +#define CXL_HDM_DECODER0_CTRL_COMMIT BIT(9) +#define CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10) +#define CXL_HDM_DECODER0_CTRL_TYPE BIT(12) +#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24) +#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28) static inline int cxl_hdm_decoder_count(u32 cap_hdr) { @@ -162,7 +177,8 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, #define CXL_DECODER_F_TYPE2 BIT(2) #define CXL_DECODER_F_TYPE3 BIT(3) #define CXL_DECODER_F_LOCK BIT(4) -#define CXL_DECODER_F_MASK GENMASK(4, 0) +#define CXL_DECODER_F_ENABLE BIT(5) +#define CXL_DECODER_F_MASK GENMASK(5, 0) enum cxl_decoder_type { CXL_DECODER_ACCELERATOR = 2, @@ -306,7 +322,12 @@ struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port, struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); +int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map); int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld); +struct cxl_hdm; +struct cxl_hdm *devm_cxl_setup_hdm(struct device *host, struct cxl_port *port); +int devm_cxl_enumerate_decoders(struct device *host, struct cxl_hdm *cxlhdm); +int devm_cxl_add_passthrough_decoder(struct device *host, struct cxl_port *port); extern struct bus_type cxl_bus_type; diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 8d96d009ad90..fca2d1b5f6ff 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -264,4 +264,12 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); + +struct cxl_hdm { + struct cxl_component_regs regs; + unsigned int decoder_count; + unsigned int target_count; + unsigned int interleave_mask; + struct cxl_port *port; +}; #endif /* __CXL_MEM_H__ */ diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 61123544aa49..3045d7cba0db 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -5,6 +5,9 @@ ldflags-y += --wrap=acpi_evaluate_integer ldflags-y += --wrap=acpi_pci_find_root ldflags-y += --wrap=nvdimm_bus_register ldflags-y += --wrap=devm_cxl_port_enumerate_dports +ldflags-y += --wrap=devm_cxl_setup_hdm +ldflags-y += --wrap=devm_cxl_add_passthrough_decoder +ldflags-y += --wrap=devm_cxl_enumerate_decoders DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl @@ -31,6 +34,7 @@ cxl_core-y += $(CXL_CORE_SRC)/regs.o cxl_core-y += $(CXL_CORE_SRC)/memdev.o cxl_core-y += $(CXL_CORE_SRC)/mbox.o cxl_core-y += $(CXL_CORE_SRC)/pci.o +cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += config_check.o obj-m += test/ diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index ef002e909d38..81c09380c537 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "mock.h" #define NR_CXL_HOST_BRIDGES 4 @@ -398,6 +399,31 @@ static struct acpi_pci_root *mock_acpi_pci_find_root(acpi_handle handle) return &mock_pci_root[host_bridge_index(adev)]; } +static struct cxl_hdm *mock_cxl_setup_hdm(struct device *host, + struct cxl_port *port) +{ + struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL); + + if (!cxlhdm) + return ERR_PTR(-ENOMEM); + + cxlhdm->port = port; + return cxlhdm; +} + +static int mock_cxl_add_passthrough_decoder(struct device *host, + struct cxl_port *port) +{ + dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n"); + return -EOPNOTSUPP; +} + +static int mock_cxl_enumerate_decoders(struct device *host, + struct cxl_hdm *cxlhdm) +{ + return 0; +} + static int mock_cxl_port_enumerate_dports(struct device *host, struct cxl_port *port) { @@ -439,6 +465,9 @@ static struct cxl_mock_ops cxl_mock_ops = { .acpi_evaluate_integer = mock_acpi_evaluate_integer, .acpi_pci_find_root = mock_acpi_pci_find_root, .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, + .devm_cxl_setup_hdm = mock_cxl_setup_hdm, + .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder, + .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 56b4b7d734bc..18d3b65e2a9b 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -131,6 +131,56 @@ __wrap_nvdimm_bus_register(struct device *dev, } EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); +struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct device *host, + struct cxl_port *port) +{ + int index; + struct cxl_hdm *cxlhdm; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + cxlhdm = ops->devm_cxl_setup_hdm(host, port); + else + cxlhdm = devm_cxl_setup_hdm(host, port); + put_cxl_mock_ops(index); + + return cxlhdm; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL); + +int __wrap_devm_cxl_add_passthrough_decoder(struct device *host, + struct cxl_port *port) +{ + int rc, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + rc = ops->devm_cxl_add_passthrough_decoder(host, port); + else + rc = devm_cxl_add_passthrough_decoder(host, port); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, CXL); + +int __wrap_devm_cxl_enumerate_decoders(struct device *host, + struct cxl_hdm *cxlhdm) +{ + int rc, index; + struct cxl_port *port = cxlhdm->port; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + rc = ops->devm_cxl_enumerate_decoders(host, cxlhdm); + else + rc = devm_cxl_enumerate_decoders(host, cxlhdm); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, CXL); + int __wrap_devm_cxl_port_enumerate_dports(struct device *host, struct cxl_port *port) { diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 99e7ff38090d..15e48063ea4b 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -21,6 +21,9 @@ struct cxl_mock_ops { bool (*is_mock_dev)(struct device *dev); int (*devm_cxl_port_enumerate_dports)(struct device *host, struct cxl_port *port); + struct cxl_hdm *(*devm_cxl_setup_hdm)(struct device *host, struct cxl_port *port); + int (*devm_cxl_add_passthrough_decoder)(struct device *host, struct cxl_port *port); + int (*devm_cxl_enumerate_decoders)(struct device *host, struct cxl_hdm *hdm); }; void register_cxl_mock_ops(struct cxl_mock_ops *ops); From 83fbdbe4c18678eebe63fc49913f149a5afde057 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:30:41 -0800 Subject: [PATCH 23/48] cxl/core: Emit modalias for CXL devices In order to enable libkmod lookups for CXL device objects to their corresponding module, add 'modalias' to the base attribute of CXL devices. Reviewed-by: Jonathan Cameron Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/164298424120.3018233.15611905873808708542.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- Documentation/ABI/testing/sysfs-bus-cxl | 9 +++++++++ drivers/cxl/core/port.c | 26 ++++++++++++++++--------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 0b6a2e6e8fbb..6d8cbf3355b5 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -34,6 +34,15 @@ Description: the same value communicated in the DEVTYPE environment variable for uevents for devices on the "cxl" bus. +What: /sys/bus/cxl/devices/*/modalias +Date: December, 2021 +KernelVersion: v5.18 +Contact: linux-cxl@vger.kernel.org +Description: + CXL device objects export the modalias attribute which mirrors + the same value communicated in the MODALIAS environment variable + for uevents for devices on the "cxl" bus. + What: /sys/bus/cxl/devices/portX/uport Date: June, 2021 KernelVersion: v5.14 diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 4dfb9df9e648..0bbd8fb8f35d 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -34,8 +34,25 @@ static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(devtype); +static int cxl_device_id(struct device *dev) +{ + if (dev->type == &cxl_nvdimm_bridge_type) + return CXL_DEVICE_NVDIMM_BRIDGE; + if (dev->type == &cxl_nvdimm_type) + return CXL_DEVICE_NVDIMM; + return 0; +} + +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, CXL_MODALIAS_FMT "\n", cxl_device_id(dev)); +} +static DEVICE_ATTR_RO(modalias); + static struct attribute *cxl_base_attributes[] = { &dev_attr_devtype.attr, + &dev_attr_modalias.attr, NULL, }; @@ -855,15 +872,6 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv) } EXPORT_SYMBOL_NS_GPL(cxl_driver_unregister, CXL); -static int cxl_device_id(struct device *dev) -{ - if (dev->type == &cxl_nvdimm_bridge_type) - return CXL_DEVICE_NVDIMM_BRIDGE; - if (dev->type == &cxl_nvdimm_type) - return CXL_DEVICE_NVDIMM; - return 0; -} - static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env) { return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT, From 54cdbf845cf719c09b45ae588cba469aabb3159c Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 1 Feb 2022 13:07:51 -0800 Subject: [PATCH 24/48] cxl/port: Add a driver for 'struct cxl_port' objects The need for a CXL port driver and a dedicated cxl_bus_type is driven by a need to simultaneously support 2 independent physical memory decode domains (cache coherent CXL.mem and uncached PCI.mmio) that also intersect at a single PCIe device node. A CXL Port is a device that advertises a CXL Component Register block with an "HDM Decoder Capability Structure". >From Documentation/driver-api/cxl/memory-devices.rst: Similar to how a RAID driver takes disk objects and assembles them into a new logical device, the CXL subsystem is tasked to take PCIe and ACPI objects and assemble them into a CXL.mem decode topology. The need for runtime configuration of the CXL.mem topology is also similar to RAID in that different environments with the same hardware configuration may decide to assemble the topology in contrasting ways. One may choose performance (RAID0) striping memory across multiple Host Bridges and endpoints while another may opt for fault tolerance and disable any striping in the CXL.mem topology. The port driver identifies whether an endpoint Memory Expander is connected to a CXL topology. If an active (bound to the 'cxl_port' driver) CXL Port is not found at every PCIe Switch Upstream port and an active "root" CXL Port then the device is just a plain PCIe endpoint only capable of participating in PCI.mmio and DMA cycles, not CXL.mem coherent interleave sets. The 'cxl_port' driver lets the CXL subsystem leverage driver-core infrastructure for setup and teardown of register resources and communicating device activation status to userspace. The cxl_bus_type can rendezvous the async arrival of platform level CXL resources (via the 'cxl_acpi' driver) with the asynchronous enumeration of Memory Expander endpoints, while also implementing a hierarchical locking model independent of the associated 'struct pci_dev' locking model. The locking for dport and decoder enumeration is now handled in the core rather than callers. For now the port driver only enumerates and registers CXL resources (downstream port metadata and decoder resources) later it will be used to take action on its decoders in response to CXL.mem region provisioning requests. Note1: cxlpci.h has long depended on pci.h, but port.c was the first to not include pci.h. Carry that dependency in cxlpci.h. Note2: cxl port enumeration and probing complicates CXL subsystem init to the point that it helps to have centralized debug logging of probe events in cxl_bus_probe(). Reported-by: kernel test robot Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron Co-developed-by: Dan Williams Link: https://lore.kernel.org/r/164374948116.464348.1772618057599155408.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- .../driver-api/cxl/memory-devices.rst | 302 ++++++++++++++++++ drivers/cxl/Kconfig | 5 + drivers/cxl/Makefile | 2 + drivers/cxl/acpi.c | 26 +- drivers/cxl/core/pci.c | 2 - drivers/cxl/core/port.c | 34 +- drivers/cxl/cxl.h | 4 + drivers/cxl/cxlpci.h | 1 + drivers/cxl/port.c | 63 ++++ tools/testing/cxl/Kbuild | 5 + tools/testing/cxl/test/cxl.c | 2 - 11 files changed, 415 insertions(+), 31 deletions(-) create mode 100644 drivers/cxl/port.c diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst index c8f7a16cd0e3..3498d38d7cbd 100644 --- a/Documentation/driver-api/cxl/memory-devices.rst +++ b/Documentation/driver-api/cxl/memory-devices.rst @@ -14,6 +14,303 @@ that optionally define a device's contribution to an interleaved address range across multiple devices underneath a host-bridge or interleaved across host-bridges. +CXL Bus: Theory of Operation +============================ +Similar to how a RAID driver takes disk objects and assembles them into a new +logical device, the CXL subsystem is tasked to take PCIe and ACPI objects and +assemble them into a CXL.mem decode topology. The need for runtime configuration +of the CXL.mem topology is also similar to RAID in that different environments +with the same hardware configuration may decide to assemble the topology in +contrasting ways. One may choose performance (RAID0) striping memory across +multiple Host Bridges and endpoints while another may opt for fault tolerance +and disable any striping in the CXL.mem topology. + +Platform firmware enumerates a menu of interleave options at the "CXL root port" +(Linux term for the top of the CXL decode topology). From there, PCIe topology +dictates which endpoints can participate in which Host Bridge decode regimes. +Each PCIe Switch in the path between the root and an endpoint introduces a point +at which the interleave can be split. For example platform firmware may say at a +given range only decodes to 1 one Host Bridge, but that Host Bridge may in turn +interleave cycles across multiple Root Ports. An intervening Switch between a +port and an endpoint may interleave cycles across multiple Downstream Switch +Ports, etc. + +Here is a sample listing of a CXL topology defined by 'cxl_test'. The 'cxl_test' +module generates an emulated CXL topology of 2 Host Bridges each with 2 Root +Ports. Each of those Root Ports are connected to 2-way switches with endpoints +connected to those downstream ports for a total of 8 endpoints:: + + # cxl list -BEMPu -b cxl_test + { + "bus":"root3", + "provider":"cxl_test", + "ports:root3":[ + { + "port":"port5", + "host":"cxl_host_bridge.1", + "ports:port5":[ + { + "port":"port8", + "host":"cxl_switch_uport.1", + "endpoints:port8":[ + { + "endpoint":"endpoint9", + "host":"mem2", + "memdev":{ + "memdev":"mem2", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x1", + "numa_node":1, + "host":"cxl_mem.1" + } + }, + { + "endpoint":"endpoint15", + "host":"mem6", + "memdev":{ + "memdev":"mem6", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x5", + "numa_node":1, + "host":"cxl_mem.5" + } + } + ] + }, + { + "port":"port12", + "host":"cxl_switch_uport.3", + "endpoints:port12":[ + { + "endpoint":"endpoint17", + "host":"mem8", + "memdev":{ + "memdev":"mem8", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x7", + "numa_node":1, + "host":"cxl_mem.7" + } + }, + { + "endpoint":"endpoint13", + "host":"mem4", + "memdev":{ + "memdev":"mem4", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x3", + "numa_node":1, + "host":"cxl_mem.3" + } + } + ] + } + ] + }, + { + "port":"port4", + "host":"cxl_host_bridge.0", + "ports:port4":[ + { + "port":"port6", + "host":"cxl_switch_uport.0", + "endpoints:port6":[ + { + "endpoint":"endpoint7", + "host":"mem1", + "memdev":{ + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0", + "numa_node":0, + "host":"cxl_mem.0" + } + }, + { + "endpoint":"endpoint14", + "host":"mem5", + "memdev":{ + "memdev":"mem5", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x4", + "numa_node":0, + "host":"cxl_mem.4" + } + } + ] + }, + { + "port":"port10", + "host":"cxl_switch_uport.2", + "endpoints:port10":[ + { + "endpoint":"endpoint16", + "host":"mem7", + "memdev":{ + "memdev":"mem7", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x6", + "numa_node":0, + "host":"cxl_mem.6" + } + }, + { + "endpoint":"endpoint11", + "host":"mem3", + "memdev":{ + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + } + ] + } + ] + } + ] + } + +In that listing each "root", "port", and "endpoint" object correspond a kernel +'struct cxl_port' object. A 'cxl_port' is a device that can decode CXL.mem to +its descendants. So "root" claims non-PCIe enumerable platform decode ranges and +decodes them to "ports", "ports" decode to "endpoints", and "endpoints" +represent the decode from SPA (System Physical Address) to DPA (Device Physical +Address). + +Continuing the RAID analogy, disks have both topology metadata and on device +metadata that determine RAID set assembly. CXL Port topology and CXL Port link +status is metadata for CXL.mem set assembly. The CXL Port topology is enumerated +by the arrival of a CXL.mem device. I.e. unless and until the PCIe core attaches +the cxl_pci driver to a CXL Memory Expander there is no role for CXL Port +objects. Conversely for hot-unplug / removal scenarios, there is no need for +the Linux PCI core to tear down switch-level CXL resources because the endpoint +->remove() event cleans up the port data that was established to support that +Memory Expander. + +The port metadata and potential decode schemes that a give memory device may +participate can be determined via a command like:: + + # cxl list -BDMu -d root -m mem3 + { + "bus":"root3", + "provider":"cxl_test", + "decoders:root3":[ + { + "decoder":"decoder3.1", + "resource":"0x8030000000", + "size":"512.00 MiB (536.87 MB)", + "volatile_capable":true, + "nr_targets":2 + }, + { + "decoder":"decoder3.3", + "resource":"0x8060000000", + "size":"512.00 MiB (536.87 MB)", + "pmem_capable":true, + "nr_targets":2 + }, + { + "decoder":"decoder3.0", + "resource":"0x8020000000", + "size":"256.00 MiB (268.44 MB)", + "volatile_capable":true, + "nr_targets":1 + }, + { + "decoder":"decoder3.2", + "resource":"0x8050000000", + "size":"256.00 MiB (268.44 MB)", + "pmem_capable":true, + "nr_targets":1 + } + ], + "memdevs:root3":[ + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + ] + } + +...which queries the CXL topology to ask "given CXL Memory Expander with a kernel +device name of 'mem3' which platform level decode ranges may this device +participate". A given expander can participate in multiple CXL.mem interleave +sets simultaneously depending on how many decoder resource it has. In this +example mem3 can participate in one or more of a PMEM interleave that spans to +Host Bridges, a PMEM interleave that targets a single Host Bridge, a Volatile +memory interleave that spans 2 Host Bridges, and a Volatile memory interleave +that only targets a single Host Bridge. + +Conversely the memory devices that can participate in a given platform level +decode scheme can be determined via a command like the following:: + + # cxl list -MDu -d 3.2 + [ + { + "memdevs":[ + { + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0", + "numa_node":0, + "host":"cxl_mem.0" + }, + { + "memdev":"mem5", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x4", + "numa_node":0, + "host":"cxl_mem.4" + }, + { + "memdev":"mem7", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x6", + "numa_node":0, + "host":"cxl_mem.6" + }, + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + ] + }, + { + "root decoders":[ + { + "decoder":"decoder3.2", + "resource":"0x8050000000", + "size":"256.00 MiB (268.44 MB)", + "pmem_capable":true, + "nr_targets":1 + } + ] + } + ] + +...where the naming scheme for decoders is "decoder.". + Driver Infrastructure ===================== @@ -28,6 +325,11 @@ CXL Memory Device .. kernel-doc:: drivers/cxl/pci.c :internal: +CXL Port +-------- +.. kernel-doc:: drivers/cxl/port.c + :doc: cxl port + CXL Core -------- .. kernel-doc:: drivers/cxl/cxl.h diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index ef05e96f8f97..4f4f7587f6ca 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -77,4 +77,9 @@ config CXL_PMEM provisioning the persistent memory capacity of CXL memory expanders. If unsure say 'm'. + +config CXL_PORT + default CXL_BUS + tristate + endif diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index cf07ae6cea17..56fcac2323cb 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -3,7 +3,9 @@ obj-$(CONFIG_CXL_BUS) += core/ obj-$(CONFIG_CXL_PCI) += cxl_pci.o obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o +obj-$(CONFIG_CXL_PORT) += cxl_port.o cxl_pci-y := pci.o cxl_acpi-y := acpi.o cxl_pmem-y := pmem.o +cxl_port-y := port.o diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 8c2ced91518b..82591642ea90 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -169,7 +169,6 @@ static int add_host_bridge_uport(struct device *match, void *arg) struct acpi_device *bridge = to_cxl_host_bridge(host, match); struct acpi_pci_root *pci_root; struct cxl_dport *dport; - struct cxl_hdm *cxlhdm; struct cxl_port *port; int rc; @@ -197,28 +196,7 @@ static int add_host_bridge_uport(struct device *match, void *arg) return PTR_ERR(port); dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev)); - rc = devm_cxl_port_enumerate_dports(host, port); - if (rc < 0) - return rc; - cxl_device_lock(&port->dev); - if (rc == 1) { - rc = devm_cxl_add_passthrough_decoder(host, port); - goto out; - } - - cxlhdm = devm_cxl_setup_hdm(host, port); - if (IS_ERR(cxlhdm)) { - rc = PTR_ERR(cxlhdm); - goto out; - } - - rc = devm_cxl_enumerate_decoders(host, cxlhdm); - if (rc) - dev_err(&port->dev, "Couldn't enumerate decoders (%d)\n", rc); - -out: - cxl_device_unlock(&port->dev); - return rc; + return 0; } struct cxl_chbs_context { @@ -278,9 +256,7 @@ static int add_host_bridge_dport(struct device *match, void *arg) return 0; } - cxl_device_lock(&root_port->dev); dport = devm_cxl_add_dport(host, root_port, match, uid, ctx.chbcr); - cxl_device_unlock(&root_port->dev); if (IS_ERR(dport)) { dev_err(host, "failed to add downstream port: %s\n", dev_name(match)); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c5a9e03ed477..8ec5f74da679 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -47,10 +47,8 @@ static int match_add_dports(struct pci_dev *pdev, void *data) dev_dbg(&port->dev, "failed to find component registers\n"); port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); - cxl_device_lock(&port->dev); dport = devm_cxl_add_dport(ctx->host, port, &pdev->dev, port_num, cxl_regmap_to_base(pdev, &map)); - cxl_device_unlock(&port->dev); if (IS_ERR(dport)) { ctx->error = PTR_ERR(dport); return PTR_ERR(dport); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 0bbd8fb8f35d..a66284b7eb1b 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -40,6 +40,11 @@ static int cxl_device_id(struct device *dev) return CXL_DEVICE_NVDIMM_BRIDGE; if (dev->type == &cxl_nvdimm_type) return CXL_DEVICE_NVDIMM; + if (is_cxl_port(dev)) { + if (is_cxl_root(to_cxl_port(dev))) + return CXL_DEVICE_ROOT; + return CXL_DEVICE_PORT; + } return 0; } @@ -298,6 +303,9 @@ static void unregister_port(void *_port) { struct cxl_port *port = _port; + if (!is_cxl_root(port)) + device_lock_assert(port->dev.parent); + device_unregister(&port->dev); } @@ -526,15 +534,34 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) return dup ? -EEXIST : 0; } +/* + * Since root-level CXL dports cannot be enumerated by PCI they are not + * enumerated by the common port driver that acquires the port lock over + * dport add/remove. Instead, root dports are manually added by a + * platform driver and cond_cxl_root_lock() is used to take the missing + * port lock in that case. + */ +static void cond_cxl_root_lock(struct cxl_port *port) +{ + if (is_cxl_root(port)) + cxl_device_lock(&port->dev); +} + +static void cond_cxl_root_unlock(struct cxl_port *port) +{ + if (is_cxl_root(port)) + cxl_device_unlock(&port->dev); +} + static void cxl_dport_remove(void *data) { struct cxl_dport *dport = data; struct cxl_port *port = dport->port; put_device(dport->dport); - cxl_device_lock(&port->dev); + cond_cxl_root_lock(port); list_del(&dport->list); - cxl_device_unlock(&port->dev); + cond_cxl_root_unlock(port); } static void cxl_dport_unlink(void *data) @@ -587,7 +614,9 @@ struct cxl_dport *devm_cxl_add_dport(struct device *host, struct cxl_port *port, dport->component_reg_phys = component_reg_phys; dport->port = port; + cond_cxl_root_lock(port); rc = add_dport(port, dport); + cond_cxl_root_unlock(port); if (rc) return ERR_PTR(rc); @@ -895,6 +924,7 @@ static int cxl_bus_probe(struct device *dev) rc = to_cxl_drv(dev->driver)->probe(dev); cxl_nested_unlock(dev); + dev_dbg(dev, "probe: %d\n", rc); return rc; } diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index c127d5c0ac96..2b24eb56618f 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -163,6 +163,8 @@ int cxl_map_device_regs(struct pci_dev *pdev, enum cxl_regloc_type; int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map); +void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, + resource_size_t length); #define CXL_RESOURCE_NONE ((resource_size_t) -1) #define CXL_TARGET_STRLEN 20 @@ -354,6 +356,8 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv); #define CXL_DEVICE_NVDIMM_BRIDGE 1 #define CXL_DEVICE_NVDIMM 2 +#define CXL_DEVICE_PORT 3 +#define CXL_DEVICE_ROOT 4 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*") #define CXL_MODALIAS_FMT "cxl:t%d" diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 103636fda198..47640f19e899 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -2,6 +2,7 @@ /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ #ifndef __CXL_PCI_H__ #define __CXL_PCI_H__ +#include #include "cxl.h" #define CXL_MEMORY_PROGIF 0x10 diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c new file mode 100644 index 000000000000..daa4c3c33aed --- /dev/null +++ b/drivers/cxl/port.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include +#include +#include + +#include "cxlmem.h" +#include "cxlpci.h" + +/** + * DOC: cxl port + * + * The port driver enumerates dport via PCI and scans for HDM + * (Host-managed-Device-Memory) decoder resources via the + * @component_reg_phys value passed in by the agent that registered the + * port. All descendant ports of a CXL root port (described by platform + * firmware) are managed in this drivers context. Each driver instance + * is responsible for tearing down the driver context of immediate + * descendant ports. The locking for this is validated by + * CONFIG_PROVE_CXL_LOCKING. + * + * The primary service this driver provides is presenting APIs to other + * drivers to utilize the decoders, and indicating to userspace (via bind + * status) the connectivity of the CXL.mem protocol throughout the + * PCIe topology. + */ + +static int cxl_port_probe(struct device *dev) +{ + struct cxl_port *port = to_cxl_port(dev); + struct cxl_hdm *cxlhdm; + int rc; + + rc = devm_cxl_port_enumerate_dports(dev, port); + if (rc < 0) + return rc; + + if (rc == 1) + return devm_cxl_add_passthrough_decoder(dev, port); + + cxlhdm = devm_cxl_setup_hdm(dev, port); + if (IS_ERR(cxlhdm)) + return PTR_ERR(cxlhdm); + + rc = devm_cxl_enumerate_decoders(dev, cxlhdm); + if (rc) { + dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc); + return rc; + } + + return 0; +} + +static struct cxl_driver cxl_port_driver = { + .name = "cxl_port", + .probe = cxl_port_probe, + .id = CXL_DEVICE_PORT, +}; + +module_cxl_driver(cxl_port_driver); +MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(CXL); +MODULE_ALIAS_CXL(CXL_DEVICE_PORT); diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 3045d7cba0db..27ae13e23e79 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -26,6 +26,11 @@ obj-m += cxl_pmem.o cxl_pmem-y := $(CXL_SRC)/pmem.o cxl_pmem-y += config_check.o +obj-m += cxl_port.o + +cxl_port-y := $(CXL_SRC)/port.o +cxl_port-y += config_check.o + obj-m += cxl_core.o cxl_core-y := $(CXL_CORE_SRC)/port.o diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 81c09380c537..ce6ace286fc7 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -437,10 +437,8 @@ static int mock_cxl_port_enumerate_dports(struct device *host, if (pdev->dev.parent != port->uport) continue; - cxl_device_lock(&port->dev); dport = devm_cxl_add_dport(host, port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); - cxl_device_unlock(&port->dev); if (IS_ERR(dport)) { dev_err(dev, "failed to add dport: %s (%ld)\n", From 664bf115833c2d4ee717ab63f4e6e72a25c66e77 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 1 Feb 2022 13:23:14 -0800 Subject: [PATCH 25/48] cxl/core/port: Remove @host argument for dport + decoder enumeration Now that dport and decoder enumeration is centralized in the port driver, the @host argument for these helpers can be made implicit. For the root port the host is the port's uport device (ACPI0017 for cxl_acpi), and for all other descendant ports the devm context is the parent of @port. Reviewed-by: Jonathan Cameron Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/164375043390.484143.17617734732003230076.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 2 +- drivers/cxl/core/hdm.c | 12 +++++------- drivers/cxl/core/pci.c | 7 ++----- drivers/cxl/core/port.c | 9 +++++++-- drivers/cxl/cxl.h | 8 ++++---- drivers/cxl/cxlpci.h | 2 +- drivers/cxl/port.c | 8 ++++---- tools/testing/cxl/test/cxl.c | 14 +++++--------- tools/testing/cxl/test/mock.c | 28 ++++++++++++---------------- tools/testing/cxl/test/mock.h | 9 ++++----- 10 files changed, 45 insertions(+), 54 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 82591642ea90..683f2ca32c97 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -256,7 +256,7 @@ static int add_host_bridge_dport(struct device *match, void *arg) return 0; } - dport = devm_cxl_add_dport(host, root_port, match, uid, ctx.chbcr); + dport = devm_cxl_add_dport(root_port, match, uid, ctx.chbcr); if (IS_ERR(dport)) { dev_err(host, "failed to add downstream port: %s\n", dev_name(match)); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 84f4ed288a88..80280db316c0 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -44,7 +44,7 @@ static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, * are claimed and passed to the single dport. Disable the range until the first * CXL region is enumerated / activated. */ -int devm_cxl_add_passthrough_decoder(struct device *host, struct cxl_port *port) +int devm_cxl_add_passthrough_decoder(struct cxl_port *port) { struct cxl_decoder *cxld; struct cxl_dport *dport; @@ -93,21 +93,20 @@ static void __iomem *map_hdm_decoder_regs(struct cxl_port *port, /** * devm_cxl_setup_hdm - map HDM decoder component registers - * @host: devm context for allocations * @port: cxl_port to map */ -struct cxl_hdm *devm_cxl_setup_hdm(struct device *host, struct cxl_port *port) +struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port) { struct device *dev = &port->dev; void __iomem *crb, *hdm; struct cxl_hdm *cxlhdm; - cxlhdm = devm_kzalloc(host, sizeof(*cxlhdm), GFP_KERNEL); + cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); if (!cxlhdm) return ERR_PTR(-ENOMEM); cxlhdm->port = port; - crb = devm_cxl_iomap_block(host, port->component_reg_phys, + crb = devm_cxl_iomap_block(dev, port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); if (!crb) { dev_err(dev, "No component registers mapped\n"); @@ -195,10 +194,9 @@ static void init_hdm_decoder(struct cxl_decoder *cxld, int *target_map, /** * devm_cxl_enumerate_decoders - add decoder objects per HDM register set - * @host: devm allocation context * @cxlhdm: Structure to populate with HDM capabilities */ -int devm_cxl_enumerate_decoders(struct device *host, struct cxl_hdm *cxlhdm) +int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) { void __iomem *hdm = cxlhdm->regs.hdm_decoder; struct cxl_port *port = cxlhdm->port; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 8ec5f74da679..c9a494d6976a 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -15,7 +15,6 @@ struct cxl_walk_context { struct pci_bus *bus; - struct device *host; struct cxl_port *port; int type; int error; @@ -47,7 +46,7 @@ static int match_add_dports(struct pci_dev *pdev, void *data) dev_dbg(&port->dev, "failed to find component registers\n"); port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); - dport = devm_cxl_add_dport(ctx->host, port, &pdev->dev, port_num, + dport = devm_cxl_add_dport(port, &pdev->dev, port_num, cxl_regmap_to_base(pdev, &map)); if (IS_ERR(dport)) { ctx->error = PTR_ERR(dport); @@ -62,13 +61,12 @@ static int match_add_dports(struct pci_dev *pdev, void *data) /** * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port - * @host: devm context * @port: cxl_port whose ->uport is the upstream of dports to be enumerated * * Returns a positive number of dports enumerated or a negative error * code. */ -int devm_cxl_port_enumerate_dports(struct device *host, struct cxl_port *port) +int devm_cxl_port_enumerate_dports(struct cxl_port *port) { struct pci_bus *bus = cxl_port_to_pci_bus(port); struct cxl_walk_context ctx; @@ -83,7 +81,6 @@ int devm_cxl_port_enumerate_dports(struct device *host, struct cxl_port *port) type = PCI_EXP_TYPE_DOWNSTREAM; ctx = (struct cxl_walk_context) { - .host = host, .port = port, .bus = bus, .type = type, diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index a66284b7eb1b..62b9f5dc64b5 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -576,7 +576,6 @@ static void cxl_dport_unlink(void *data) /** * devm_cxl_add_dport - append downstream port data to a cxl_port - * @host: devm context for allocations * @port: the cxl_port that references this dport * @dport_dev: firmware or PCI device representing the dport * @port_id: identifier for this dport in a decoder's target list @@ -586,14 +585,20 @@ static void cxl_dport_unlink(void *data) * either the port's host (for root ports), or the port itself (for * switch ports) */ -struct cxl_dport *devm_cxl_add_dport(struct device *host, struct cxl_port *port, +struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id, resource_size_t component_reg_phys) { char link_name[CXL_TARGET_STRLEN]; struct cxl_dport *dport; + struct device *host; int rc; + if (is_cxl_root(port)) + host = port->uport; + else + host = &port->dev; + if (!host->driver) { dev_WARN_ONCE(&port->dev, 1, "dport:%s bad devm context\n", dev_name(dport_dev)); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 2b24eb56618f..89fbf49ebf98 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -313,7 +313,7 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, resource_size_t component_reg_phys, struct cxl_port *parent_port); struct cxl_port *find_cxl_root(struct device *dev); -struct cxl_dport *devm_cxl_add_dport(struct device *host, struct cxl_port *port, +struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id, resource_size_t component_reg_phys); struct cxl_decoder *to_cxl_decoder(struct device *dev); @@ -327,9 +327,9 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map); int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld); struct cxl_hdm; -struct cxl_hdm *devm_cxl_setup_hdm(struct device *host, struct cxl_port *port); -int devm_cxl_enumerate_decoders(struct device *host, struct cxl_hdm *cxlhdm); -int devm_cxl_add_passthrough_decoder(struct device *host, struct cxl_port *port); +struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port); +int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm); +int devm_cxl_add_passthrough_decoder(struct cxl_port *port); extern struct bus_type cxl_bus_type; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 47640f19e899..766de340c4ce 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -58,5 +58,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, return pci_resource_start(pdev, map->barno) + map->block_offset; } -int devm_cxl_port_enumerate_dports(struct device *host, struct cxl_port *port); +int devm_cxl_port_enumerate_dports(struct cxl_port *port); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index daa4c3c33aed..5a1aec28dc46 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -31,18 +31,18 @@ static int cxl_port_probe(struct device *dev) struct cxl_hdm *cxlhdm; int rc; - rc = devm_cxl_port_enumerate_dports(dev, port); + rc = devm_cxl_port_enumerate_dports(port); if (rc < 0) return rc; if (rc == 1) - return devm_cxl_add_passthrough_decoder(dev, port); + return devm_cxl_add_passthrough_decoder(port); - cxlhdm = devm_cxl_setup_hdm(dev, port); + cxlhdm = devm_cxl_setup_hdm(port); if (IS_ERR(cxlhdm)) return PTR_ERR(cxlhdm); - rc = devm_cxl_enumerate_decoders(dev, cxlhdm); + rc = devm_cxl_enumerate_decoders(cxlhdm); if (rc) { dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc); return rc; diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index ce6ace286fc7..40ed567952e6 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -399,8 +399,7 @@ static struct acpi_pci_root *mock_acpi_pci_find_root(acpi_handle handle) return &mock_pci_root[host_bridge_index(adev)]; } -static struct cxl_hdm *mock_cxl_setup_hdm(struct device *host, - struct cxl_port *port) +static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port) { struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL); @@ -411,21 +410,18 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct device *host, return cxlhdm; } -static int mock_cxl_add_passthrough_decoder(struct device *host, - struct cxl_port *port) +static int mock_cxl_add_passthrough_decoder(struct cxl_port *port) { dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n"); return -EOPNOTSUPP; } -static int mock_cxl_enumerate_decoders(struct device *host, - struct cxl_hdm *cxlhdm) +static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) { return 0; } -static int mock_cxl_port_enumerate_dports(struct device *host, - struct cxl_port *port) +static int mock_cxl_port_enumerate_dports(struct cxl_port *port) { struct device *dev = &port->dev; int i; @@ -437,7 +433,7 @@ static int mock_cxl_port_enumerate_dports(struct device *host, if (pdev->dev.parent != port->uport) continue; - dport = devm_cxl_add_dport(host, port, &pdev->dev, pdev->id, + dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); if (IS_ERR(dport)) { diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 18d3b65e2a9b..6e8c9d63c92d 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -131,66 +131,62 @@ __wrap_nvdimm_bus_register(struct device *dev, } EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); -struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct device *host, - struct cxl_port *port) +struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port) { int index; struct cxl_hdm *cxlhdm; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(port->uport)) - cxlhdm = ops->devm_cxl_setup_hdm(host, port); + cxlhdm = ops->devm_cxl_setup_hdm(port); else - cxlhdm = devm_cxl_setup_hdm(host, port); + cxlhdm = devm_cxl_setup_hdm(port); put_cxl_mock_ops(index); return cxlhdm; } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL); -int __wrap_devm_cxl_add_passthrough_decoder(struct device *host, - struct cxl_port *port) +int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) { int rc, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(port->uport)) - rc = ops->devm_cxl_add_passthrough_decoder(host, port); + rc = ops->devm_cxl_add_passthrough_decoder(port); else - rc = devm_cxl_add_passthrough_decoder(host, port); + rc = devm_cxl_add_passthrough_decoder(port); put_cxl_mock_ops(index); return rc; } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, CXL); -int __wrap_devm_cxl_enumerate_decoders(struct device *host, - struct cxl_hdm *cxlhdm) +int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) { int rc, index; struct cxl_port *port = cxlhdm->port; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(port->uport)) - rc = ops->devm_cxl_enumerate_decoders(host, cxlhdm); + rc = ops->devm_cxl_enumerate_decoders(cxlhdm); else - rc = devm_cxl_enumerate_decoders(host, cxlhdm); + rc = devm_cxl_enumerate_decoders(cxlhdm); put_cxl_mock_ops(index); return rc; } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, CXL); -int __wrap_devm_cxl_port_enumerate_dports(struct device *host, - struct cxl_port *port) +int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) { int rc, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(port->uport)) - rc = ops->devm_cxl_port_enumerate_dports(host, port); + rc = ops->devm_cxl_port_enumerate_dports(port); else - rc = devm_cxl_port_enumerate_dports(host, port); + rc = devm_cxl_port_enumerate_dports(port); put_cxl_mock_ops(index); return rc; diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 15e48063ea4b..738f24e3988a 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -19,11 +19,10 @@ struct cxl_mock_ops { bool (*is_mock_bus)(struct pci_bus *bus); bool (*is_mock_port)(struct device *dev); bool (*is_mock_dev)(struct device *dev); - int (*devm_cxl_port_enumerate_dports)(struct device *host, - struct cxl_port *port); - struct cxl_hdm *(*devm_cxl_setup_hdm)(struct device *host, struct cxl_port *port); - int (*devm_cxl_add_passthrough_decoder)(struct device *host, struct cxl_port *port); - int (*devm_cxl_enumerate_decoders)(struct device *host, struct cxl_hdm *hdm); + int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port); + struct cxl_hdm *(*devm_cxl_setup_hdm)(struct cxl_port *port); + int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port); + int (*devm_cxl_enumerate_decoders)(struct cxl_hdm *hdm); }; void register_cxl_mock_ops(struct cxl_mock_ops *ops); From 4112a08dd3c5ea0a96029f14061f2320826cfd32 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 1 Feb 2022 13:28:53 -0800 Subject: [PATCH 26/48] cxl/pci: Store component register base in cxlds In preparation for defining a cxl_port object to represent the decoder resources of a memory expander capture the component register base address. The port driver uses the component register base to enumerate the HDM Decoder Capability structure. Unlike other cxl_port objects the endpoint port decodes from upstream SPA to downstream DPA rather than upstream port to downstream port. Signed-off-by: Ben Widawsky Reported-by: kernel test robot Reviewed-by: Jonathan Cameron [djbw: clarify changelog] Link: https://lore.kernel.org/r/164375084181.484304.3919737667590006795.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/cxlmem.h | 3 +++ drivers/cxl/pci.c | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index fca2d1b5f6ff..90d67fff5bed 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -116,6 +116,7 @@ struct cxl_mbox_cmd { * @active_persistent_bytes: sum of hard + soft persistent * @next_volatile_bytes: volatile capacity change pending device reset * @next_persistent_bytes: persistent capacity change pending device reset + * @component_reg_phys: register base of component registers * @mbox_send: @dev specific transport for transmitting mailbox commands * * See section 8.2.9.5.2 Capacity Configuration and Label Storage for @@ -145,6 +146,8 @@ struct cxl_dev_state { u64 next_volatile_bytes; u64 next_persistent_bytes; + resource_size_t component_reg_phys; + int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); }; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 8b435b875b65..bf14c365ea33 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -416,6 +416,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + /* + * If the component registers can't be found, the cxl_pci driver may + * still be useful for management functions so don't return an error. + */ + cxlds->component_reg_phys = CXL_RESOURCE_NONE; + rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + if (rc) + dev_warn(&pdev->dev, "No component registers (%d)\n", rc); + + cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map); + rc = cxl_pci_setup_mailbox(cxlds); if (rc) return rc; From 06e279e5ebe4f32ffe544ec96a199870319a7315 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 1 Feb 2022 14:06:32 -0800 Subject: [PATCH 27/48] cxl/pci: Cache device DVSEC offset The PCIe device DVSEC, defined in the CXL 2.0 spec, 8.1.3 is required to be implemented by CXL 2.0 endpoint devices. In preparation for consuming this information in a new cxl_mem driver, retrieve the CXL DVSEC position and warn about the implications of not finding it. Allow for mailbox operation even if the CXL DVSEC is missing. Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164375309615.513620.7874131241128599893.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/cxlmem.h | 2 ++ drivers/cxl/pci.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 90d67fff5bed..5cf5329e13a9 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -98,6 +98,7 @@ struct cxl_mbox_cmd { * * @dev: The device associated with this CXL state * @regs: Parsed register blocks + * @cxl_dvsec: Offset to the PCIe device DVSEC * @payload_size: Size of space for payload * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register) * @lsa_size: Size of Label Storage Area @@ -126,6 +127,7 @@ struct cxl_dev_state { struct device *dev; struct cxl_regs regs; + int cxl_dvsec; size_t payload_size; size_t lsa_size; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index bf14c365ea33..c94002166084 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -408,6 +408,12 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlds)) return PTR_ERR(cxlds); + cxlds->cxl_dvsec = pci_find_dvsec_capability( + pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); + if (!cxlds->cxl_dvsec) + dev_warn(&pdev->dev, + "Device DVSEC not present, skip CXL.mem init\n"); + rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) return rc; From 560f78559006a4bab20455ae7eca33d8417c38fc Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 1 Feb 2022 15:48:56 -0800 Subject: [PATCH 28/48] cxl/pci: Retrieve CXL DVSEC memory info Before CXL 2.0 HDM Decoder Capability mechanisms can be utilized in a device the driver must determine that the device is ready for CXL.mem operation and that platform firmware, or some other agent, has established an active decode via the legacy CXL 1.1 decoder mechanism. This legacy mechanism is defined in the CXL DVSEC as a set of range registers and status bits that take time to settle after a reset. Validate the CXL memory decode setup via the DVSEC and cache it for later consideration by the cxl_mem driver (to be added). Failure to validate is not fatal to the cxl_pci driver since that is only providing CXL command support over PCI.mmio, and might be needed to rectify CXL DVSEC validation problems. Any potential ranges that the device is already claiming via DVSEC need to be reconciled with the dynamic provisioning ranges provided by platform firmware (like ACPI CEDT.CFMWS). Leave that reconciliation to the cxl_mem driver. [djbw: shorten defines] [djbw: change precise spin wait to generous msleep] Reported-by: kernel test robot Signed-off-by: Ben Widawsky [djbw: clarify changelog] Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164375911821.559935.7375160041663453400.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/cxlmem.h | 14 +++++ drivers/cxl/cxlpci.h | 13 +++++ drivers/cxl/pci.c | 119 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+) diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 5cf5329e13a9..00f55f4066b9 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -89,6 +89,18 @@ struct cxl_mbox_cmd { */ #define CXL_CAPACITY_MULTIPLIER SZ_256M +/** + * struct cxl_endpoint_dvsec_info - Cached DVSEC info + * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE + * @ranges: Number of active HDM ranges this device uses. + * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE + */ +struct cxl_endpoint_dvsec_info { + bool mem_enabled; + int ranges; + struct range dvsec_range[2]; +}; + /** * struct cxl_dev_state - The driver device state * @@ -118,6 +130,7 @@ struct cxl_mbox_cmd { * @next_volatile_bytes: volatile capacity change pending device reset * @next_persistent_bytes: persistent capacity change pending device reset * @component_reg_phys: register base of component registers + * @info: Cached DVSEC information about the device. * @mbox_send: @dev specific transport for transmitting mailbox commands * * See section 8.2.9.5.2 Capacity Configuration and Label Storage for @@ -149,6 +162,7 @@ struct cxl_dev_state { u64 next_persistent_bytes; resource_size_t component_reg_phys; + struct cxl_endpoint_dvsec_info info; int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); }; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 766de340c4ce..329e7ea3f36a 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -17,6 +17,19 @@ /* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ #define CXL_DVSEC_PCIE_DEVICE 0 +#define CXL_DVSEC_CAP_OFFSET 0xA +#define CXL_DVSEC_MEM_CAPABLE BIT(2) +#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) +#define CXL_DVSEC_CTRL_OFFSET 0xC +#define CXL_DVSEC_MEM_ENABLE BIT(2) +#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) +#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) +#define CXL_DVSEC_MEM_INFO_VALID BIT(0) +#define CXL_DVSEC_MEM_ACTIVE BIT(1) +#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) +#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) +#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) +#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) /* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */ #define CXL_DVSEC_FUNCTION_MAP 2 diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index c94002166084..6b3270246545 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -386,6 +386,120 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, return rc; } +static int wait_for_valid(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec, rc; + u32 val; + + /* + * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high + * and Size Low registers are valid. Must be set within 1 second of + * deassertion of reset to CXL device. Likely it is already set by the + * time this runs, but otherwise give a 1.5 second timeout in case of + * clock skew. + */ + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + msleep(1500); + + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + return -ETIMEDOUT; +} + +static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) +{ + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + int hdm_count, rc, i; + u16 cap, ctrl; + + if (!d) + return -ENXIO; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); + if (rc) + return rc; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc) + return rc; + + if (!(cap & CXL_DVSEC_MEM_CAPABLE)) + return -ENXIO; + + /* + * It is not allowed by spec for MEM.capable to be set and have 0 legacy + * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this + * driver is for a spec defined class code which must be CXL.mem + * capable, there is no point in continuing to enable CXL.mem. + */ + hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + if (!hdm_count || hdm_count > 2) + return -EINVAL; + + rc = wait_for_valid(cxlds); + if (rc) + return rc; + + info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + + for (i = 0; i < hdm_count; i++) { + u64 base, size; + u32 temp; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + if (rc) + return rc; + + size = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + if (rc) + return rc; + + size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + if (rc) + return rc; + + base = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + if (rc) + return rc; + + base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + + info->dvsec_range[i] = (struct range) { + .start = base, + .end = base + size - 1 + }; + + if (size) + info->ranges++; + } + + return 0; +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -449,6 +563,11 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + rc = cxl_dvsec_ranges(cxlds); + if (rc) + dev_warn(&pdev->dev, + "Failed to get DVSEC range information (%d)\n", rc); + cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); From 523e594d9cc03db962c741ce02c8a58aab58a123 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Sun, 23 Jan 2022 16:31:13 -0800 Subject: [PATCH 29/48] cxl/pci: Implement wait for media active CXL 2.0 8.1.3.8.2 states: Memory_Active: When set, indicates that the CXL Range 1 memory is fully initialized and available for software use. Must be set within Range 1. Memory_Active_Timeout of deassertion of reset to CXL device if CXL.mem HwInit Mode=1 Unfortunately, Memory_Active can take quite a long time depending on media size (up to 256s per 2.0 spec). Provide a callback for the eventual establishment of CXL.mem operations via the 'cxl_mem' driver the 'struct cxl_memdev'. The implementation waits for 60s by default for now and can be overridden by the mbox_ready_time module parameter. Signed-off-by: Ben Widawsky [djbw: switch to sleeping wait] Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164298427373.3018233.9309741847039301834.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/cxlmem.h | 2 ++ drivers/cxl/pci.c | 49 +++++++++++++++++++++++++++++++++++- tools/testing/cxl/test/mem.c | 8 ++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 00f55f4066b9..e70838e5dc17 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -132,6 +132,7 @@ struct cxl_endpoint_dvsec_info { * @component_reg_phys: register base of component registers * @info: Cached DVSEC information about the device. * @mbox_send: @dev specific transport for transmitting mailbox commands + * @wait_media_ready: @dev specific method to await media ready * * See section 8.2.9.5.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -165,6 +166,7 @@ struct cxl_dev_state { struct cxl_endpoint_dvsec_info info; int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); + int (*wait_media_ready)(struct cxl_dev_state *cxlds); }; enum cxl_opcode { diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 6b3270246545..c01bd44d11c4 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -49,7 +49,7 @@ static unsigned short mbox_ready_timeout = 60; module_param(mbox_ready_timeout, ushort, 0644); MODULE_PARM_DESC(mbox_ready_timeout, - "seconds to wait for mailbox ready status"); + "seconds to wait for mailbox ready / memory active status"); static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) { @@ -418,6 +418,51 @@ static int wait_for_valid(struct cxl_dev_state *cxlds) return -ETIMEDOUT; } +/* + * Wait up to @mbox_ready_timeout for the device to report memory + * active. + */ +static int wait_for_media_ready(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + bool active = false; + u64 md_status; + int rc, i; + + rc = wait_for_valid(cxlds); + if (rc) + return rc; + + for (i = mbox_ready_timeout; i; i--) { + u32 temp; + int rc; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); + if (rc) + return rc; + + active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + if (active) + break; + msleep(1000); + } + + if (!active) { + dev_err(&pdev->dev, + "timeout awaiting memory active after %d seconds\n", + mbox_ready_timeout); + return -ETIMEDOUT; + } + + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (!CXLMDEV_READY(md_status)) + return -EIO; + + return 0; +} + static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) { struct cxl_endpoint_dvsec_info *info = &cxlds->info; @@ -528,6 +573,8 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_warn(&pdev->dev, "Device DVSEC not present, skip CXL.mem init\n"); + cxlds->wait_media_ready = wait_for_media_ready; + rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) return rc; diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 8c2086c4caef..3af3f94de0c3 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -236,6 +237,12 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * return rc; } +static int cxl_mock_wait_media_ready(struct cxl_dev_state *cxlds) +{ + msleep(100); + return 0; +} + static void label_area_release(void *lsa) { vfree(lsa); @@ -262,6 +269,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) return PTR_ERR(cxlds); cxlds->mbox_send = cxl_mock_mbox_send; + cxlds->wait_media_ready = cxl_mock_wait_media_ready; cxlds->payload_size = SZ_4K; rc = cxl_enumerate_cmds(cxlds); From bcc79ea34398845d814170ddc06a457b35ae1975 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 31 Jan 2022 13:56:11 -0800 Subject: [PATCH 30/48] cxl/pci: Emit device serial number Per the CXL specification (8.1.12.2 Memory Device PCIe Capabilities and Extended Capabilities) the Device Serial Number capability is mandatory. Emit it for user tooling to identify devices. It is reasonable to ask whether the attribute should be added to the list of PCI sysfs device attributes. The PCI layer can optionally emit it too, but the CXL subsystem is aiming to preserve its independence and the possibility of CXL topologies with non-PCI devices in it. To date that has only proven useful for the 'cxl_test' model, but as can be seen with seen with ACPI0016 devices, sometimes all that is needed is a platform firmware table to point to CXL Component Registers in MMIO space to define a "CXL" device. Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164366608838.196598.16856227191534267098.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- Documentation/ABI/testing/sysfs-bus-cxl | 9 +++++++++ drivers/cxl/core/memdev.c | 11 +++++++++++ drivers/cxl/cxlmem.h | 2 ++ drivers/cxl/pci.c | 1 + tools/testing/cxl/test/mem.c | 1 + 5 files changed, 24 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 6d8cbf3355b5..87c0e5e65322 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -25,6 +25,15 @@ Description: identically named field in the Identify Memory Device Output Payload in the CXL-2.0 specification. +What: /sys/bus/cxl/devices/memX/serial +Date: January, 2022 +KernelVersion: v5.18 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) 64-bit serial number per the PCIe Device Serial Number + capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2 + Memory Device PCIe Capabilities and Extended Capabilities. + What: /sys/bus/cxl/devices/*/devtype Date: June, 2021 KernelVersion: v5.14 diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 61029cb7ac62..1e574b052583 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -89,7 +89,18 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, static struct device_attribute dev_attr_pmem_size = __ATTR(size, 0444, pmem_size_show, NULL); +static ssize_t serial_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + + return sysfs_emit(buf, "%#llx\n", cxlds->serial); +} +static DEVICE_ATTR_RO(serial); + static struct attribute *cxl_memdev_attributes[] = { + &dev_attr_serial.attr, &dev_attr_firmware_version.attr, &dev_attr_payload_max.attr, &dev_attr_label_storage_size.attr, diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index e70838e5dc17..0ba0cf8dcdbc 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -131,6 +131,7 @@ struct cxl_endpoint_dvsec_info { * @next_persistent_bytes: persistent capacity change pending device reset * @component_reg_phys: register base of component registers * @info: Cached DVSEC information about the device. + * @serial: PCIe Device Serial Number * @mbox_send: @dev specific transport for transmitting mailbox commands * @wait_media_ready: @dev specific method to await media ready * @@ -164,6 +165,7 @@ struct cxl_dev_state { resource_size_t component_reg_phys; struct cxl_endpoint_dvsec_info info; + u64 serial; int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); int (*wait_media_ready)(struct cxl_dev_state *cxlds); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index c01bd44d11c4..8a7267d116b7 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -567,6 +567,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlds)) return PTR_ERR(cxlds); + cxlds->serial = pci_get_dsn(pdev); cxlds->cxl_dvsec = pci_find_dvsec_capability( pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); if (!cxlds->cxl_dvsec) diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 3af3f94de0c3..36ef337c775c 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -268,6 +268,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (IS_ERR(cxlds)) return PTR_ERR(cxlds); + cxlds->serial = pdev->id; cxlds->mbox_send = cxl_mock_mbox_send; cxlds->wait_media_ready = cxl_mock_wait_media_ready; cxlds->payload_size = SZ_4K; From cf1f6877b088cd9ddeb5f3db8ade3a61e3a3f9eb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:31:24 -0800 Subject: [PATCH 31/48] cxl/memdev: Add numa_node attribute While CXL memory targets will have their own memory target node, individual memory devices may be affinitized like other PCI devices. Emit that attribute for memdevs. Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164298428430.3018233.16409089892707993289.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- Documentation/ABI/testing/sysfs-bus-cxl | 9 +++++++++ drivers/cxl/core/memdev.c | 17 +++++++++++++++++ tools/testing/cxl/test/cxl.c | 1 + 3 files changed, 27 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 87c0e5e65322..0b51cfec0c66 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -34,6 +34,15 @@ Description: capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2 Memory Device PCIe Capabilities and Extended Capabilities. +What: /sys/bus/cxl/devices/memX/numa_node +Date: January, 2022 +KernelVersion: v5.18 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) If NUMA is enabled and the platform has affinitized the + host PCI device for this memory device, emit the CPU node + affinity for this device. + What: /sys/bus/cxl/devices/*/devtype Date: June, 2021 KernelVersion: v5.14 diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 1e574b052583..b2773664e407 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -99,11 +99,19 @@ static ssize_t serial_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(serial); +static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", dev_to_node(dev)); +} +static DEVICE_ATTR_RO(numa_node); + static struct attribute *cxl_memdev_attributes[] = { &dev_attr_serial.attr, &dev_attr_firmware_version.attr, &dev_attr_payload_max.attr, &dev_attr_label_storage_size.attr, + &dev_attr_numa_node.attr, NULL, }; @@ -117,8 +125,17 @@ static struct attribute *cxl_memdev_ram_attributes[] = { NULL, }; +static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a, + int n) +{ + if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr) + return 0; + return a->mode; +} + static struct attribute_group cxl_memdev_attribute_group = { .attrs = cxl_memdev_attributes, + .is_visible = cxl_memdev_visible, }; static struct attribute_group cxl_memdev_ram_attribute_group = { diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 40ed567952e6..cd2f20f2707f 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -583,6 +583,7 @@ static __init int cxl_test_init(void) if (!pdev) goto err_mem; pdev->dev.parent = &port->dev; + set_dev_node(&pdev->dev, i % 2); rc = platform_device_add(pdev); if (rc) { From 2703c16c75aea142c3079ec34ae2262c0557ef7f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 4 Feb 2022 07:08:40 -0800 Subject: [PATCH 32/48] cxl/core/port: Add switch port enumeration So far the platorm level CXL resources have been enumerated by the cxl_acpi driver, and cxl_pci has gathered all the pre-requisite information it needs to fire up a cxl_mem driver. However, the first thing the cxl_mem driver will be tasked to do is validate that all the PCIe Switches in its ancestry also have CXL capabilities and an CXL.mem link established. Provide a common mechanism for a CXL.mem endpoint driver to enumerate all the ancestor CXL ports in the topology and validate CXL.mem connectivity. Multiple endpoints may end up racing to establish a shared port in the topology. This race is resolved via taking the device-lock on a parent CXL Port before establishing a new child. The winner of the race establishes the port, the loser simply registers its interest in the port via 'struct cxl_ep' place-holder reference. At endpoint teardown the same parent port lock is taken as 'struct cxl_ep' references are deleted. Last endpoint to drop its reference unregisters the port. Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164398731146.902644.1029761300481366248.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 17 +- drivers/cxl/core/port.c | 431 +++++++++++++++++++++++++++++++++++++++- drivers/cxl/cxl.h | 19 ++ 3 files changed, 440 insertions(+), 27 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 683f2ca32c97..7bd53dc691ec 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -130,21 +130,6 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, return 0; } -static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device *dev) -{ - struct cxl_dport *dport; - - cxl_device_lock(&port->dev); - list_for_each_entry(dport, &port->dports, list) - if (dport->dport == dev) { - cxl_device_unlock(&port->dev); - return dport; - } - - cxl_device_unlock(&port->dev); - return NULL; -} - __mock struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev) { @@ -175,7 +160,7 @@ static int add_host_bridge_uport(struct device *match, void *arg) if (!bridge) return 0; - dport = find_dport_by_dev(root_port, match); + dport = cxl_find_dport_by_dev(root_port, match); if (!dport) { dev_dbg(host, "host bridge expected and not found\n"); return 0; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 62b9f5dc64b5..c5779c982c80 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "core.h" @@ -265,10 +266,24 @@ struct cxl_decoder *to_cxl_decoder(struct device *dev) } EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL); +static void cxl_ep_release(struct cxl_ep *ep) +{ + if (!ep) + return; + list_del(&ep->list); + put_device(ep->ep); + kfree(ep); +} + static void cxl_port_release(struct device *dev) { struct cxl_port *port = to_cxl_port(dev); + struct cxl_ep *ep, *_e; + cxl_device_lock(dev); + list_for_each_entry_safe(ep, _e, &port->endpoints, list) + cxl_ep_release(ep); + cxl_device_unlock(dev); ida_free(&cxl_port_ida, port->id); kfree(port); } @@ -359,6 +374,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport, port->component_reg_phys = component_reg_phys; ida_init(&port->decoder_ida); INIT_LIST_HEAD(&port->dports); + INIT_LIST_HEAD(&port->endpoints); device_initialize(dev); device_set_pm_not_required(dev); @@ -457,25 +473,36 @@ int devm_cxl_register_pci_bus(struct device *host, struct device *uport, } EXPORT_SYMBOL_NS_GPL(devm_cxl_register_pci_bus, CXL); +static bool dev_is_cxl_root_child(struct device *dev) +{ + struct cxl_port *port, *parent; + + if (!is_cxl_port(dev)) + return false; + + port = to_cxl_port(dev); + if (is_cxl_root(port)) + return false; + + parent = to_cxl_port(port->dev.parent); + if (is_cxl_root(parent)) + return true; + + return false; +} + /* Find a 2nd level CXL port that has a dport that is an ancestor of @match */ static int match_root_child(struct device *dev, const void *match) { const struct device *iter = NULL; - struct cxl_port *port, *parent; struct cxl_dport *dport; + struct cxl_port *port; - if (!is_cxl_port(dev)) + if (!dev_is_cxl_root_child(dev)) return 0; port = to_cxl_port(dev); - if (is_cxl_root(port)) - return 0; - - parent = to_cxl_port(port->dev.parent); - if (!is_cxl_root(parent)) - return 0; - - cxl_device_lock(&port->dev); + cxl_device_lock(dev); list_for_each_entry(dport, &port->dports, list) { iter = match; while (iter) { @@ -485,7 +512,7 @@ static int match_root_child(struct device *dev, const void *match) } } out: - cxl_device_unlock(&port->dev); + cxl_device_unlock(dev); return !!iter; } @@ -642,6 +669,388 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL); +static struct cxl_ep *find_ep(struct cxl_port *port, struct device *ep_dev) +{ + struct cxl_ep *ep; + + device_lock_assert(&port->dev); + list_for_each_entry(ep, &port->endpoints, list) + if (ep->ep == ep_dev) + return ep; + return NULL; +} + +static int add_ep(struct cxl_port *port, struct cxl_ep *new) +{ + struct cxl_ep *dup; + + cxl_device_lock(&port->dev); + if (port->dead) { + cxl_device_unlock(&port->dev); + return -ENXIO; + } + dup = find_ep(port, new->ep); + if (!dup) + list_add_tail(&new->list, &port->endpoints); + cxl_device_unlock(&port->dev); + + return dup ? -EEXIST : 0; +} + +/** + * cxl_add_ep - register an endpoint's interest in a port + * @port: a port in the endpoint's topology ancestry + * @ep_dev: device representing the endpoint + * + * Intermediate CXL ports are scanned based on the arrival of endpoints. + * When those endpoints depart the port can be destroyed once all + * endpoints that care about that port have been removed. + */ +static int cxl_add_ep(struct cxl_port *port, struct device *ep_dev) +{ + struct cxl_ep *ep; + int rc; + + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) + return -ENOMEM; + + INIT_LIST_HEAD(&ep->list); + ep->ep = get_device(ep_dev); + + rc = add_ep(port, ep); + if (rc) + cxl_ep_release(ep); + return rc; +} + +struct cxl_find_port_ctx { + const struct device *dport_dev; + const struct cxl_port *parent_port; +}; + +static int match_port_by_dport(struct device *dev, const void *data) +{ + const struct cxl_find_port_ctx *ctx = data; + struct cxl_port *port; + + if (!is_cxl_port(dev)) + return 0; + if (ctx->parent_port && dev->parent != &ctx->parent_port->dev) + return 0; + + port = to_cxl_port(dev); + return cxl_find_dport_by_dev(port, ctx->dport_dev) != NULL; +} + +static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx) +{ + struct device *dev; + + if (!ctx->dport_dev) + return NULL; + + dev = bus_find_device(&cxl_bus_type, NULL, ctx, match_port_by_dport); + if (dev) + return to_cxl_port(dev); + return NULL; +} + +static struct cxl_port *find_cxl_port(struct device *dport_dev) +{ + struct cxl_find_port_ctx ctx = { + .dport_dev = dport_dev, + }; + + return __find_cxl_port(&ctx); +} + +static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port, + struct device *dport_dev) +{ + struct cxl_find_port_ctx ctx = { + .dport_dev = dport_dev, + .parent_port = parent_port, + }; + + return __find_cxl_port(&ctx); +} + +/* + * All users of grandparent() are using it to walk PCIe-like swich port + * hierarchy. A PCIe switch is comprised of a bridge device representing the + * upstream switch port and N bridges representing downstream switch ports. When + * bridges stack the grand-parent of a downstream switch port is another + * downstream switch port in the immediate ancestor switch. + */ +static struct device *grandparent(struct device *dev) +{ + if (dev && dev->parent) + return dev->parent->parent; + return NULL; +} + +/* + * The natural end of life of a non-root 'cxl_port' is when its parent port goes + * through a ->remove() event ("top-down" unregistration). The unnatural trigger + * for a port to be unregistered is when all memdevs beneath that port have gone + * through ->remove(). This "bottom-up" removal selectively removes individual + * child ports manually. This depends on devm_cxl_add_port() to not change is + * devm action registration order. + */ +static void delete_switch_port(struct cxl_port *port, struct list_head *dports) +{ + struct cxl_dport *dport, *_d; + + list_for_each_entry_safe(dport, _d, dports, list) { + devm_release_action(&port->dev, cxl_dport_unlink, dport); + devm_release_action(&port->dev, cxl_dport_remove, dport); + devm_kfree(&port->dev, dport); + } + devm_release_action(port->dev.parent, cxl_unlink_uport, port); + devm_release_action(port->dev.parent, unregister_port, port); +} + +static void cxl_detach_ep(void *data) +{ + struct cxl_memdev *cxlmd = data; + struct device *iter; + + for (iter = &cxlmd->dev; iter; iter = grandparent(iter)) { + struct device *dport_dev = grandparent(iter); + struct cxl_port *port, *parent_port; + LIST_HEAD(reap_dports); + struct cxl_ep *ep; + + if (!dport_dev) + break; + + port = find_cxl_port(dport_dev); + if (!port || is_cxl_root(port)) { + put_device(&port->dev); + continue; + } + + parent_port = to_cxl_port(port->dev.parent); + cxl_device_lock(&parent_port->dev); + if (!parent_port->dev.driver) { + /* + * The bottom-up race to delete the port lost to a + * top-down port disable, give up here, because the + * parent_port ->remove() will have cleaned up all + * descendants. + */ + cxl_device_unlock(&parent_port->dev); + put_device(&port->dev); + continue; + } + + cxl_device_lock(&port->dev); + ep = find_ep(port, &cxlmd->dev); + dev_dbg(&cxlmd->dev, "disconnect %s from %s\n", + ep ? dev_name(ep->ep) : "", dev_name(&port->dev)); + cxl_ep_release(ep); + if (ep && !port->dead && list_empty(&port->endpoints) && + !is_cxl_root(parent_port)) { + /* + * This was the last ep attached to a dynamically + * enumerated port. Block new cxl_add_ep() and garbage + * collect the port. + */ + port->dead = true; + list_splice_init(&port->dports, &reap_dports); + } + cxl_device_unlock(&port->dev); + + if (!list_empty(&reap_dports)) { + dev_dbg(&cxlmd->dev, "delete %s\n", + dev_name(&port->dev)); + delete_switch_port(port, &reap_dports); + } + put_device(&port->dev); + cxl_device_unlock(&parent_port->dev); + } +} + +static resource_size_t find_component_registers(struct device *dev) +{ + struct cxl_register_map map; + struct pci_dev *pdev; + + /* + * Theoretically, CXL component registers can be hosted on a + * non-PCI device, in practice, only cxl_test hits this case. + */ + if (!dev_is_pci(dev)) + return CXL_RESOURCE_NONE; + + pdev = to_pci_dev(dev); + + cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + return cxl_regmap_to_base(pdev, &map); +} + +static int add_port_attach_ep(struct cxl_memdev *cxlmd, + struct device *uport_dev, + struct device *dport_dev) +{ + struct device *dparent = grandparent(dport_dev); + struct cxl_port *port, *parent_port = NULL; + resource_size_t component_reg_phys; + int rc; + + if (!dparent) { + /* + * The iteration reached the topology root without finding the + * CXL-root 'cxl_port' on a previous iteration, fail for now to + * be re-probed after platform driver attaches. + */ + dev_dbg(&cxlmd->dev, "%s is a root dport\n", + dev_name(dport_dev)); + return -ENXIO; + } + + parent_port = find_cxl_port(dparent); + if (!parent_port) { + /* iterate to create this parent_port */ + return -EAGAIN; + } + + cxl_device_lock(&parent_port->dev); + if (!parent_port->dev.driver) { + dev_warn(&cxlmd->dev, + "port %s:%s disabled, failed to enumerate CXL.mem\n", + dev_name(&parent_port->dev), dev_name(uport_dev)); + port = ERR_PTR(-ENXIO); + goto out; + } + + port = find_cxl_port_at(parent_port, dport_dev); + if (!port) { + component_reg_phys = find_component_registers(uport_dev); + port = devm_cxl_add_port(&parent_port->dev, uport_dev, + component_reg_phys, parent_port); + if (!IS_ERR(port)) + get_device(&port->dev); + } +out: + cxl_device_unlock(&parent_port->dev); + + if (IS_ERR(port)) + rc = PTR_ERR(port); + else { + dev_dbg(&cxlmd->dev, "add to new port %s:%s\n", + dev_name(&port->dev), dev_name(port->uport)); + rc = cxl_add_ep(port, &cxlmd->dev); + if (rc == -EEXIST) { + /* + * "can't" happen, but this error code means + * something to the caller, so translate it. + */ + rc = -ENXIO; + } + put_device(&port->dev); + } + + put_device(&parent_port->dev); + return rc; +} + +int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) +{ + struct device *dev = &cxlmd->dev; + struct device *iter; + int rc; + + rc = devm_add_action_or_reset(&cxlmd->dev, cxl_detach_ep, cxlmd); + if (rc) + return rc; + + /* + * Scan for and add all cxl_ports in this device's ancestry. + * Repeat until no more ports are added. Abort if a port add + * attempt fails. + */ +retry: + for (iter = dev; iter; iter = grandparent(iter)) { + struct device *dport_dev = grandparent(iter); + struct device *uport_dev; + struct cxl_port *port; + + if (!dport_dev) + return 0; + + uport_dev = dport_dev->parent; + if (!uport_dev) { + dev_warn(dev, "at %s no parent for dport: %s\n", + dev_name(iter), dev_name(dport_dev)); + return -ENXIO; + } + + dev_dbg(dev, "scan: iter: %s dport_dev: %s parent: %s\n", + dev_name(iter), dev_name(dport_dev), + dev_name(uport_dev)); + port = find_cxl_port(dport_dev); + if (port) { + dev_dbg(&cxlmd->dev, + "found already registered port %s:%s\n", + dev_name(&port->dev), dev_name(port->uport)); + rc = cxl_add_ep(port, &cxlmd->dev); + + /* + * If the endpoint already exists in the port's list, + * that's ok, it was added on a previous pass. + * Otherwise, retry in add_port_attach_ep() after taking + * the parent_port lock as the current port may be being + * reaped. + */ + if (rc && rc != -EEXIST) { + put_device(&port->dev); + return rc; + } + + /* Any more ports to add between this one and the root? */ + if (!dev_is_cxl_root_child(&port->dev)) { + put_device(&port->dev); + continue; + } + + put_device(&port->dev); + return 0; + } + + rc = add_port_attach_ep(cxlmd, uport_dev, dport_dev); + /* port missing, try to add parent */ + if (rc == -EAGAIN) + continue; + /* failed to add ep or port */ + if (rc) + return rc; + /* port added, new descendants possible, start over */ + goto retry; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_ports, CXL); + +struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, + const struct device *dev) +{ + struct cxl_dport *dport; + + cxl_device_lock(&port->dev); + list_for_each_entry(dport, &port->dports, list) + if (dport->dport == dev) { + cxl_device_unlock(&port->dev); + return dport; + } + + cxl_device_unlock(&port->dev); + return NULL; +} +EXPORT_SYMBOL_NS_GPL(cxl_find_dport_by_dev, CXL); + static int decoder_populate_targets(struct cxl_decoder *cxld, struct cxl_port *port, int *target_map) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 89fbf49ebf98..de4fbe7cbf42 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -262,8 +262,10 @@ struct cxl_nvdimm { * @uport: PCI or platform device implementing the upstream port capability * @id: id for port device-name * @dports: cxl_dport instances referenced by decoders + * @endpoints: cxl_ep instances, endpoints that are a descendant of this port * @decoder_ida: allocator for decoder ids * @component_reg_phys: component register capability base address (optional) + * @dead: last ep has been removed, force port re-creation * @depth: How deep this port is relative to the root. depth 0 is the root. */ struct cxl_port { @@ -271,8 +273,10 @@ struct cxl_port { struct device *uport; int id; struct list_head dports; + struct list_head endpoints; struct ida decoder_ida; resource_size_t component_reg_phys; + bool dead; unsigned int depth; }; @@ -292,6 +296,16 @@ struct cxl_dport { struct list_head list; }; +/** + * struct cxl_ep - track an endpoint's interest in a port + * @ep: device that hosts a generic CXL endpoint (expander or accelerator) + * @list: node on port->endpoints list + */ +struct cxl_ep { + struct device *ep; + struct list_head list; +}; + /* * The platform firmware device hosting the root is also the top of the * CXL port topology. All other CXL ports have another CXL port as their @@ -313,9 +327,14 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, resource_size_t component_reg_phys, struct cxl_port *parent_port); struct cxl_port *find_cxl_root(struct device *dev); +int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); + struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id, resource_size_t component_reg_phys); +struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, + const struct device *dev); + struct cxl_decoder *to_cxl_decoder(struct device *dev); bool is_root_decoder(struct device *dev); bool is_cxl_decoder(struct device *dev); From 8dd2bc0f8e02d39bd80851ca787bcbdb7d495e69 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Fri, 4 Feb 2022 07:18:31 -0800 Subject: [PATCH 33/48] cxl/mem: Add the cxl_mem driver At this point the subsystem can enumerate all CXL ports (CXL.mem decode resources in upstream switch ports and host bridges) in a system. The last mile is connecting those ports to endpoints. The cxl_mem driver connects an endpoint device to the platform CXL.mem protoctol decode-topology. At ->probe() time it walks its device-topology-ancestry and adds a CXL Port object at every Upstream Port hop until it gets to CXL root. The CXL root object is only present after a platform firmware driver registers platform CXL resources. For ACPI based platform this is managed by the ACPI0017 device and the cxl_acpi driver. The ports are registered such that disabling a given port automatically unregisters all descendant ports, and the chain can only be registered after the root is established. Given ACPI device scanning may run asynchronously compared to PCI device scanning the root driver is tasked with rescanning the bus after the root successfully probes. Conversely if any ports in a chain between the root and an endpoint becomes disconnected it subsequently triggers the endpoint to unregister. Given lock depenedencies the endpoint unregistration happens in a workqueue asynchronously. If userspace cares about synchronizing delayed work after port events the /sys/bus/cxl/flush attribute is available for that purpose. Reported-by: Randy Dunlap Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron [djbw: clarify changelog, rework hotplug support] Link: https://lore.kernel.org/r/164398782997.903003.9725273241627693186.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- Documentation/ABI/testing/sysfs-bus-cxl | 9 + .../driver-api/cxl/memory-devices.rst | 9 + drivers/cxl/Kconfig | 16 ++ drivers/cxl/Makefile | 2 + drivers/cxl/acpi.c | 3 +- drivers/cxl/core/memdev.c | 16 ++ drivers/cxl/core/port.c | 105 +++++++- drivers/cxl/cxl.h | 6 + drivers/cxl/cxlmem.h | 8 + drivers/cxl/mem.c | 228 ++++++++++++++++++ drivers/cxl/port.c | 12 + tools/testing/cxl/Kbuild | 6 + tools/testing/cxl/mock_mem.c | 10 + 13 files changed, 425 insertions(+), 5 deletions(-) create mode 100644 drivers/cxl/mem.c create mode 100644 tools/testing/cxl/mock_mem.c diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 0b51cfec0c66..7c2b846521f3 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -1,3 +1,12 @@ +What: /sys/bus/cxl/flush +Date: Januarry, 2022 +KernelVersion: v5.18 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) If userspace manually unbinds a port the kernel schedules + all descendant memdevs for unbind. Writing '1' to this attribute + flushes that work. + What: /sys/bus/cxl/devices/memX/firmware_version Date: December, 2020 KernelVersion: v5.12 diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst index 3498d38d7cbd..db476bb170b6 100644 --- a/Documentation/driver-api/cxl/memory-devices.rst +++ b/Documentation/driver-api/cxl/memory-devices.rst @@ -325,6 +325,9 @@ CXL Memory Device .. kernel-doc:: drivers/cxl/pci.c :internal: +.. kernel-doc:: drivers/cxl/mem.c + :doc: cxl mem + CXL Port -------- .. kernel-doc:: drivers/cxl/port.c @@ -344,6 +347,12 @@ CXL Core .. kernel-doc:: drivers/cxl/core/port.c :identifiers: +.. kernel-doc:: drivers/cxl/core/pci.c + :doc: cxl core pci + +.. kernel-doc:: drivers/cxl/core/pci.c + :identifiers: + .. kernel-doc:: drivers/cxl/core/pmem.c :doc: cxl pmem diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 4f4f7587f6ca..b88ab956bb7c 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -78,6 +78,22 @@ config CXL_PMEM If unsure say 'm'. +config CXL_MEM + tristate "CXL: Memory Expansion" + depends on CXL_PCI + default CXL_BUS + help + The CXL.mem protocol allows a device to act as a provider of "System + RAM" and/or "Persistent Memory" that is fully coherent as if the + memory were attached to the typical CPU memory controller. This is + known as HDM "Host-managed Device Memory". + + Say 'y/m' to enable a driver that will attach to CXL.mem devices for + memory expansion and control of HDM. See Chapter 9.13 in the CXL 2.0 + specification for a detailed description of HDM. + + If unsure say 'm'. + config CXL_PORT default CXL_BUS tristate diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index 56fcac2323cb..ce267ef11d93 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -1,10 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CXL_BUS) += core/ obj-$(CONFIG_CXL_PCI) += cxl_pci.o +obj-$(CONFIG_CXL_MEM) += cxl_mem.o obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o obj-$(CONFIG_CXL_PORT) += cxl_port.o +cxl_mem-y := mem.o cxl_pci-y := pci.o cxl_acpi-y := acpi.o cxl_pmem-y := pmem.o diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 7bd53dc691ec..d8295572bde9 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -314,7 +314,8 @@ static int cxl_acpi_probe(struct platform_device *pdev) if (rc < 0) return rc; - return 0; + /* In case PCI is scanned before ACPI re-trigger memdev attach */ + return cxl_bus_rescan(); } static const struct acpi_device_id cxl_acpi_ids[] = { diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index b2773664e407..1f76b28f9826 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -162,6 +162,12 @@ static const struct device_type cxl_memdev_type = { .groups = cxl_memdev_attribute_groups, }; +bool is_cxl_memdev(struct device *dev) +{ + return dev->type == &cxl_memdev_type; +} +EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL); + /** * set_exclusive_cxl_commands() - atomically disable user cxl commands * @cxlds: The device state to operate on @@ -213,6 +219,15 @@ static void cxl_memdev_unregister(void *_cxlmd) put_device(dev); } +static void detach_memdev(struct work_struct *work) +{ + struct cxl_memdev *cxlmd; + + cxlmd = container_of(work, typeof(*cxlmd), detach_work); + device_release_driver(&cxlmd->dev); + put_device(&cxlmd->dev); +} + static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, const struct file_operations *fops) { @@ -237,6 +252,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, dev->devt = MKDEV(cxl_mem_major, cxlmd->id); dev->type = &cxl_memdev_type; device_set_pm_not_required(dev); + INIT_WORK(&cxlmd->detach_work, detach_memdev); cdev = &cxlmd->cdev; cdev_init(cdev, fops); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index c5779c982c80..f460460b12b3 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ #include +#include #include #include #include @@ -46,6 +47,8 @@ static int cxl_device_id(struct device *dev) return CXL_DEVICE_ROOT; return CXL_DEVICE_PORT; } + if (is_cxl_memdev(dev)) + return CXL_DEVICE_MEMORY_EXPANDER; return 0; } @@ -318,8 +321,10 @@ static void unregister_port(void *_port) { struct cxl_port *port = _port; - if (!is_cxl_root(port)) + if (!is_cxl_root(port)) { device_lock_assert(port->dev.parent); + port->uport = NULL; + } device_unregister(&port->dev); } @@ -410,7 +415,9 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, if (parent_port) port->depth = parent_port->depth + 1; dev = &port->dev; - if (parent_port) + if (is_cxl_memdev(uport)) + rc = dev_set_name(dev, "endpoint%d", port->id); + else if (parent_port) rc = dev_set_name(dev, "port%d", port->id); else rc = dev_set_name(dev, "root%d", port->id); @@ -790,6 +797,38 @@ static struct device *grandparent(struct device *dev) return NULL; } +static void delete_endpoint(void *data) +{ + struct cxl_memdev *cxlmd = data; + struct cxl_port *endpoint = dev_get_drvdata(&cxlmd->dev); + struct cxl_port *parent_port; + struct device *parent; + + parent_port = cxl_mem_find_port(cxlmd); + if (!parent_port) + return; + parent = &parent_port->dev; + + cxl_device_lock(parent); + if (parent->driver && endpoint->uport) { + devm_release_action(parent, cxl_unlink_uport, endpoint); + devm_release_action(parent, unregister_port, endpoint); + } + cxl_device_unlock(parent); + put_device(parent); + put_device(&endpoint->dev); +} + +int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) +{ + struct device *dev = &cxlmd->dev; + + get_device(&endpoint->dev); + dev_set_drvdata(dev, endpoint); + return devm_add_action_or_reset(dev, delete_endpoint, cxlmd); +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL); + /* * The natural end of life of a non-root 'cxl_port' is when its parent port goes * through a ->remove() event ("top-down" unregistration). The unnatural trigger @@ -1034,6 +1073,12 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) } EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_ports, CXL); +struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd) +{ + return find_cxl_port(grandparent(&cxlmd->dev)); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, CXL); + struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, const struct device *dev) { @@ -1352,12 +1397,54 @@ static void cxl_bus_remove(struct device *dev) cxl_nested_unlock(dev); } +static struct workqueue_struct *cxl_bus_wq; + +int cxl_bus_rescan(void) +{ + return bus_rescan_devices(&cxl_bus_type); +} +EXPORT_SYMBOL_NS_GPL(cxl_bus_rescan, CXL); + +bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd) +{ + return queue_work(cxl_bus_wq, &cxlmd->detach_work); +} +EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL); + +/* for user tooling to ensure port disable work has completed */ +static ssize_t flush_store(struct bus_type *bus, const char *buf, size_t count) +{ + if (sysfs_streq(buf, "1")) { + flush_workqueue(cxl_bus_wq); + return count; + } + + return -EINVAL; +} + +static BUS_ATTR_WO(flush); + +static struct attribute *cxl_bus_attributes[] = { + &bus_attr_flush.attr, + NULL, +}; + +static struct attribute_group cxl_bus_attribute_group = { + .attrs = cxl_bus_attributes, +}; + +static const struct attribute_group *cxl_bus_attribute_groups[] = { + &cxl_bus_attribute_group, + NULL, +}; + struct bus_type cxl_bus_type = { .name = "cxl", .uevent = cxl_bus_uevent, .match = cxl_bus_match, .probe = cxl_bus_probe, .remove = cxl_bus_remove, + .bus_groups = cxl_bus_attribute_groups, }; EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL); @@ -1371,12 +1458,21 @@ static __init int cxl_core_init(void) if (rc) return rc; + cxl_bus_wq = alloc_ordered_workqueue("cxl_port", 0); + if (!cxl_bus_wq) { + rc = -ENOMEM; + goto err_wq; + } + rc = bus_register(&cxl_bus_type); if (rc) - goto err; + goto err_bus; + return 0; -err: +err_bus: + destroy_workqueue(cxl_bus_wq); +err_wq: cxl_memdev_exit(); cxl_mbox_exit(); return rc; @@ -1385,6 +1481,7 @@ static __init int cxl_core_init(void) static void cxl_core_exit(void) { bus_unregister(&cxl_bus_type); + destroy_workqueue(cxl_bus_wq); cxl_memdev_exit(); cxl_mbox_exit(); } diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index de4fbe7cbf42..f5e5b4ac8228 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -328,6 +328,9 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, struct cxl_port *parent_port); struct cxl_port *find_cxl_root(struct device *dev); int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); +int cxl_bus_rescan(void); +struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd); +bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd); struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id, @@ -345,6 +348,8 @@ struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map); int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld); +int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint); + struct cxl_hdm; struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port); int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm); @@ -377,6 +382,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv); #define CXL_DEVICE_NVDIMM 2 #define CXL_DEVICE_PORT 3 #define CXL_DEVICE_ROOT 4 +#define CXL_DEVICE_MEMORY_EXPANDER 5 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*") #define CXL_MODALIAS_FMT "cxl:t%d" diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 0ba0cf8dcdbc..5d33ce24fe09 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -34,12 +34,14 @@ * @dev: driver core device object * @cdev: char dev core object for ioctl operations * @cxlds: The device state backing this device + * @detach_work: active memdev lost a port in its ancestry * @id: id number of this memdev instance. */ struct cxl_memdev { struct device dev; struct cdev cdev; struct cxl_dev_state *cxlds; + struct work_struct detach_work; int id; }; @@ -48,6 +50,12 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev) return container_of(dev, struct cxl_memdev, dev); } +bool is_cxl_memdev(struct device *dev); +static inline bool is_cxl_endpoint(struct cxl_port *port) +{ + return is_cxl_memdev(port->uport); +} + struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds); /** diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c new file mode 100644 index 000000000000..49a4b1c47299 --- /dev/null +++ b/drivers/cxl/mem.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include +#include +#include + +#include "cxlmem.h" +#include "cxlpci.h" + +/** + * DOC: cxl mem + * + * CXL memory endpoint devices and switches are CXL capable devices that are + * participating in CXL.mem protocol. Their functionality builds on top of the + * CXL.io protocol that allows enumerating and configuring components via + * standard PCI mechanisms. + * + * The cxl_mem driver owns kicking off the enumeration of this CXL.mem + * capability. With the detection of a CXL capable endpoint, the driver will + * walk up to find the platform specific port it is connected to, and determine + * if there are intervening switches in the path. If there are switches, a + * secondary action is to enumerate those (implemented in cxl_core). Finally the + * cxl_mem driver adds the device it is bound to as a CXL endpoint-port for use + * in higher level operations. + */ + +static int wait_for_media(struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + int rc; + + if (!info->mem_enabled) + return -EBUSY; + + rc = cxlds->wait_media_ready(cxlds); + if (rc) + return rc; + + /* + * We know the device is active, and enabled, if any ranges are non-zero + * we'll need to check later before adding the port since that owns the + * HDM decoder registers. + */ + return 0; +} + +static int create_endpoint(struct cxl_memdev *cxlmd, + struct cxl_port *parent_port) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_port *endpoint; + + endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev, + cxlds->component_reg_phys, parent_port); + if (IS_ERR(endpoint)) + return PTR_ERR(endpoint); + + dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev)); + + if (!endpoint->dev.driver) { + dev_err(&cxlmd->dev, "%s failed probe\n", + dev_name(&endpoint->dev)); + return -ENXIO; + } + + return cxl_endpoint_autoremove(cxlmd, endpoint); +} + +/** + * cxl_dvsec_decode_init() - Setup HDM decoding for the endpoint + * @cxlds: Device state + * + * Additionally, enables global HDM decoding. Warning: don't call this outside + * of probe. Once probe is complete, the port driver owns all access to the HDM + * decoder registers. + * + * Returns: false if DVSEC Ranges are being used instead of HDM + * decoders, or if it can not be determined if DVSEC Ranges are in use. + * Otherwise, returns true. + */ +__mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) +{ + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + struct cxl_register_map map; + struct cxl_component_reg_map *cmap = &map.component_map; + bool global_enable, do_hdm_init = false; + void __iomem *crb; + u32 global_ctrl; + + /* map hdm decoder */ + crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); + if (!crb) { + dev_dbg(cxlds->dev, "Failed to map component registers\n"); + return false; + } + + cxl_probe_component_regs(cxlds->dev, crb, cmap); + if (!cmap->hdm_decoder.valid) { + dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n"); + goto out; + } + + global_ctrl = readl(crb + cmap->hdm_decoder.offset + + CXL_HDM_DECODER_CTRL_OFFSET); + global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; + if (!global_enable && info->ranges) { + dev_dbg(cxlds->dev, + "DVSEC ranges already programmed and HDM decoders not enabled.\n"); + goto out; + } + + do_hdm_init = true; + + /* + * Permanently (for this boot at least) opt the device into HDM + * operation. Individual HDM decoders still need to be enabled after + * this point. + */ + if (!global_enable) { + dev_dbg(cxlds->dev, "Enabling HDM decode\n"); + writel(global_ctrl | CXL_HDM_DECODER_ENABLE, + crb + cmap->hdm_decoder.offset + + CXL_HDM_DECODER_CTRL_OFFSET); + } + +out: + iounmap(crb); + return do_hdm_init; +} + +static int cxl_mem_probe(struct device *dev) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_port *parent_port; + int rc; + + /* + * Someone is trying to reattach this device after it lost its port + * connection (an endpoint port previously registered by this memdev was + * disabled). This racy check is ok because if the port is still gone, + * no harm done, and if the port hierarchy comes back it will re-trigger + * this probe. Port rescan and memdev detach work share the same + * single-threaded workqueue. + */ + if (work_pending(&cxlmd->detach_work)) + return -EBUSY; + + rc = wait_for_media(cxlmd); + if (rc) { + dev_err(dev, "Media not active (%d)\n", rc); + return rc; + } + + /* + * If DVSEC ranges are being used instead of HDM decoder registers there + * is no use in trying to manage those. + */ + if (!cxl_dvsec_decode_init(cxlds)) { + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + int i; + + /* */ + for (i = 0; i < 2; i++) { + u64 base, size; + + /* + * Give a nice warning to the user that BIOS has really + * botched things for them if it didn't place DVSEC + * ranges in the memory map. + */ + base = info->dvsec_range[i].start; + size = range_len(&info->dvsec_range[i]); + if (size && !region_intersects(base, size, + IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE)) { + dev_err(dev, + "DVSEC range %#llx-%#llx must be reserved by BIOS, but isn't\n", + base, base + size - 1); + } + } + dev_err(dev, + "Active DVSEC range registers in use. Will not bind.\n"); + return -EBUSY; + } + + rc = devm_cxl_enumerate_ports(cxlmd); + if (rc) + return rc; + + parent_port = cxl_mem_find_port(cxlmd); + if (!parent_port) { + dev_err(dev, "CXL port topology not found\n"); + return -ENXIO; + } + + cxl_device_lock(&parent_port->dev); + if (!parent_port->dev.driver) { + dev_err(dev, "CXL port topology %s not enabled\n", + dev_name(&parent_port->dev)); + rc = -ENXIO; + goto out; + } + + rc = create_endpoint(cxlmd, parent_port); +out: + cxl_device_unlock(&parent_port->dev); + put_device(&parent_port->dev); + return rc; +} + +static struct cxl_driver cxl_mem_driver = { + .name = "cxl_mem", + .probe = cxl_mem_probe, + .id = CXL_DEVICE_MEMORY_EXPANDER, +}; + +module_cxl_driver(cxl_mem_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(CXL); +MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER); +/* + * create_endpoint() wants to validate port driver attach immediately after + * endpoint registration. + */ +MODULE_SOFTDEP("pre: cxl_port"); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 5a1aec28dc46..4d4e23b9adff 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -25,12 +25,24 @@ * PCIe topology. */ +static void schedule_detach(void *cxlmd) +{ + schedule_cxl_memdev_detach(cxlmd); +} + static int cxl_port_probe(struct device *dev) { struct cxl_port *port = to_cxl_port(dev); struct cxl_hdm *cxlhdm; int rc; + if (is_cxl_endpoint(port)) { + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); + + get_device(&cxlmd->dev); + return devm_add_action_or_reset(dev, schedule_detach, cxlmd); + } + rc = devm_cxl_port_enumerate_dports(port); if (rc < 0) return rc; diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 27ae13e23e79..82e49ab0937d 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -31,6 +31,12 @@ obj-m += cxl_port.o cxl_port-y := $(CXL_SRC)/port.o cxl_port-y += config_check.o +obj-m += cxl_mem.o + +cxl_mem-y := $(CXL_SRC)/mem.o +cxl_mem-y += mock_mem.o +cxl_mem-y += config_check.o + obj-m += cxl_core.o cxl_core-y := $(CXL_CORE_SRC)/port.o diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c new file mode 100644 index 000000000000..d1dec5845139 --- /dev/null +++ b/tools/testing/cxl/mock_mem.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ + +#include + +struct cxl_dev_state; +bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) +{ + return true; +} From 8aea0ef19fde030f983aba0e7ec5bcf10880a6fe Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:31:41 -0800 Subject: [PATCH 34/48] cxl/core: Move target_list out of base decoder attributes In preparation for introducing endpoint decoder objects, move the target_list attribute out of the common set since it has no meaning for endpoint decoders. Reviewed-by: Jonathan Cameron Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/164298430100.3018233.4715072508880290970.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index f460460b12b3..359d4303de9a 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -184,7 +184,6 @@ static struct attribute *cxl_decoder_base_attrs[] = { &dev_attr_start.attr, &dev_attr_size.attr, &dev_attr_locked.attr, - &dev_attr_target_list.attr, NULL, }; @@ -197,6 +196,7 @@ static struct attribute *cxl_decoder_root_attrs[] = { &dev_attr_cap_ram.attr, &dev_attr_cap_type2.attr, &dev_attr_cap_type3.attr, + &dev_attr_target_list.attr, NULL, }; @@ -213,6 +213,7 @@ static const struct attribute_group *cxl_decoder_root_attribute_groups[] = { static struct attribute *cxl_decoder_switch_attrs[] = { &dev_attr_target_type.attr, + &dev_attr_target_list.attr, NULL, }; From 9b71e1c9c3aaae5079f5e267785b6f035c5f23da Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Wed, 2 Feb 2022 20:02:06 -0800 Subject: [PATCH 35/48] cxl/core/port: Add endpoint decoders Recall that a CXL Port is any object that publishes a CXL HDM Decoder Capability structure. That is Host Bridge and Switches that have been enabled so far. Now, add decoder support to the 'endpoint' CXL Ports registered by the cxl_mem driver. They mostly share the same enumeration as Bridges and Switches, but witout a target list. The target of endpoint decode is device-internal DPA space, not another downstream port. Signed-off-by: Ben Widawsky Reviewed-by: Jonathan Cameron [djbw: clarify changelog, hookup enumeration in the port driver] Link: https://lore.kernel.org/r/164386092069.765089.14895687988217608642.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 8 ++++- drivers/cxl/core/port.c | 65 ++++++++++++++++++++++++++++++++++++----- drivers/cxl/cxl.h | 1 + drivers/cxl/port.c | 17 ++++++----- 4 files changed, 74 insertions(+), 17 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 80280db316c0..05b0b292e72d 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -186,6 +186,9 @@ static void init_hdm_decoder(struct cxl_decoder *cxld, int *target_map, else cxld->target_type = CXL_DECODER_ACCELERATOR; + if (is_cxl_endpoint(to_cxl_port(cxld->dev.parent))) + return; + target_list.value = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which)); for (i = 0; i < cxld->interleave_ways; i++) @@ -225,7 +228,10 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) int rc, target_count = cxlhdm->target_count; struct cxl_decoder *cxld; - cxld = cxl_switch_decoder_alloc(port, target_count); + if (is_cxl_endpoint(port)) + cxld = cxl_endpoint_decoder_alloc(port); + else + cxld = cxl_switch_decoder_alloc(port, target_count); if (IS_ERR(cxld)) { dev_warn(&port->dev, "Failed to allocate the decoder\n"); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 359d4303de9a..bc18d339738b 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -228,6 +228,22 @@ static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = { NULL, }; +static struct attribute *cxl_decoder_endpoint_attrs[] = { + &dev_attr_target_type.attr, + NULL, +}; + +static struct attribute_group cxl_decoder_endpoint_attribute_group = { + .attrs = cxl_decoder_endpoint_attrs, +}; + +static const struct attribute_group *cxl_decoder_endpoint_attribute_groups[] = { + &cxl_decoder_base_attribute_group, + &cxl_decoder_endpoint_attribute_group, + &cxl_base_attribute_group, + NULL, +}; + static void cxl_decoder_release(struct device *dev) { struct cxl_decoder *cxld = to_cxl_decoder(dev); @@ -237,6 +253,12 @@ static void cxl_decoder_release(struct device *dev) kfree(cxld); } +static const struct device_type cxl_decoder_endpoint_type = { + .name = "cxl_decoder_endpoint", + .release = cxl_decoder_release, + .groups = cxl_decoder_endpoint_attribute_groups, +}; + static const struct device_type cxl_decoder_switch_type = { .name = "cxl_decoder_switch", .release = cxl_decoder_release, @@ -249,6 +271,11 @@ static const struct device_type cxl_decoder_root_type = { .groups = cxl_decoder_root_attribute_groups, }; +static bool is_endpoint_decoder(struct device *dev) +{ + return dev->type == &cxl_decoder_endpoint_type; +} + bool is_root_decoder(struct device *dev) { return dev->type == &cxl_decoder_root_type; @@ -1129,7 +1156,9 @@ static int decoder_populate_targets(struct cxl_decoder *cxld, * cxl_decoder_alloc - Allocate a new CXL decoder * @port: owning port of this decoder * @nr_targets: downstream targets accessible by this decoder. All upstream - * ports and root ports must have at least 1 target. + * ports and root ports must have at least 1 target. Endpoint + * devices will have 0 targets. Callers wishing to register an + * endpoint device should specify 0. * * A port should contain one or more decoders. Each of those decoders enable * some address space for CXL.mem utilization. A decoder is expected to be @@ -1145,7 +1174,7 @@ static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, struct device *dev; int rc = 0; - if (nr_targets > CXL_DECODER_MAX_INTERLEAVE || nr_targets == 0) + if (nr_targets > CXL_DECODER_MAX_INTERLEAVE) return ERR_PTR(-EINVAL); cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL); @@ -1166,6 +1195,8 @@ static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, dev->bus = &cxl_bus_type; if (is_cxl_root(port)) cxld->dev.type = &cxl_decoder_root_type; + else if (is_cxl_endpoint(port)) + cxld->dev.type = &cxl_decoder_endpoint_type; else cxld->dev.type = &cxl_decoder_switch_type; @@ -1215,13 +1246,28 @@ EXPORT_SYMBOL_NS_GPL(cxl_root_decoder_alloc, CXL); struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, unsigned int nr_targets) { - if (is_cxl_root(port)) + if (is_cxl_root(port) || is_cxl_endpoint(port)) return ERR_PTR(-EINVAL); return cxl_decoder_alloc(port, nr_targets); } EXPORT_SYMBOL_NS_GPL(cxl_switch_decoder_alloc, CXL); +/** + * cxl_endpoint_decoder_alloc - Allocate an endpoint decoder + * @port: owning port of this decoder + * + * Return: A new cxl decoder to be registered by cxl_decoder_add() + */ +struct cxl_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port) +{ + if (!is_cxl_endpoint(port)) + return ERR_PTR(-EINVAL); + + return cxl_decoder_alloc(port, 0); +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, CXL); + /** * cxl_decoder_add_locked - Add a decoder with targets * @cxld: The cxl decoder allocated by cxl_decoder_alloc() @@ -1256,12 +1302,15 @@ int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) if (cxld->interleave_ways < 1) return -EINVAL; - port = to_cxl_port(cxld->dev.parent); - rc = decoder_populate_targets(cxld, port, target_map); - if (rc) - return rc; - dev = &cxld->dev; + + port = to_cxl_port(cxld->dev.parent); + if (!is_endpoint_decoder(dev)) { + rc = decoder_populate_targets(cxld, port, target_map); + if (rc) + return rc; + } + rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id); if (rc) return rc; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index f5e5b4ac8228..990b6670222e 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -346,6 +346,7 @@ struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port, struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); +struct cxl_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port); int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map); int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld); int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 4d4e23b9adff..d420da5fc39c 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -40,16 +40,17 @@ static int cxl_port_probe(struct device *dev) struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); get_device(&cxlmd->dev); - return devm_add_action_or_reset(dev, schedule_detach, cxlmd); + rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd); + if (rc) + return rc; + } else { + rc = devm_cxl_port_enumerate_dports(port); + if (rc < 0) + return rc; + if (rc == 1) + return devm_cxl_add_passthrough_decoder(port); } - rc = devm_cxl_port_enumerate_dports(port); - if (rc < 0) - return rc; - - if (rc == 1) - return devm_cxl_add_passthrough_decoder(port); - cxlhdm = devm_cxl_setup_hdm(port); if (IS_ERR(cxlhdm)) return PTR_ERR(cxlhdm); From f246abd67ff0dcb471ce2361a913b935d4c8ac11 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:31:51 -0800 Subject: [PATCH 36/48] tools/testing/cxl: Mock dvsec_ranges() For test purposes, pretend that that CXL DVSEC ranges are not in active use and the device is ready CXL.mem operation. Link: https://lore.kernel.org/r/164298431119.3018233.17175518196764977542.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/test/mem.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 36ef337c775c..b6b726eff3e2 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -248,6 +248,14 @@ static void label_area_release(void *lsa) vfree(lsa); } +static void mock_validate_dvsec_ranges(struct cxl_dev_state *cxlds) +{ + struct cxl_endpoint_dvsec_info *info; + + info = &cxlds->info; + info->mem_enabled = true; +} + static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -285,6 +293,8 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; + mock_validate_dvsec_ranges(cxlds); + cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); From a4a0ce242fcd7022349212c4e2f795762e6ff050 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:31:56 -0800 Subject: [PATCH 37/48] tools/testing/cxl: Fix root port to host bridge assignment Mocked root-ports are meant to be round-robin assigned to host-bridges. Fixes: 67dcdd4d3b83 ("tools/testing/cxl: Introduce a mocked-up CXL port hierarchy") Link: https://lore.kernel.org/r/164298431629.3018233.14004377108116384485.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/test/cxl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index cd2f20f2707f..7e4a0b1ee436 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -558,7 +558,7 @@ static __init int cxl_test_init(void) for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) { struct platform_device *bridge = - cxl_host_bridge[i / NR_CXL_ROOT_PORTS]; + cxl_host_bridge[i % ARRAY_SIZE(cxl_host_bridge)]; struct platform_device *pdev; pdev = platform_device_alloc("cxl_root_port", i); From c1915142e8c1168498c8b08cd4e02728d1c33563 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:32:01 -0800 Subject: [PATCH 38/48] tools/testing/cxl: Mock one level of switches The CXL port enumeration process adds intermediate CXL ports that are discovered between "root" CXL ports enumerated by 'cxl_acpi' and endpoints enumerated by 'cxl_pci + cxl_mem'. Test the dynamic discovery of intermediate switch ports in a CXL topology. Link: https://lore.kernel.org/r/164298432189.3018233.13142151550113000967.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/test/cxl.c | 138 ++++++++++++++++++++++++----------- 1 file changed, 97 insertions(+), 41 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 7e4a0b1ee436..ea88fabc3198 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -11,14 +11,21 @@ #include #include "mock.h" -#define NR_CXL_HOST_BRIDGES 4 +#define NR_CXL_HOST_BRIDGES 2 #define NR_CXL_ROOT_PORTS 2 +#define NR_CXL_SWITCH_PORTS 2 static struct platform_device *cxl_acpi; static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; static struct platform_device *cxl_root_port[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; -struct platform_device *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; +static struct platform_device + *cxl_switch_uport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; +static struct platform_device + *cxl_switch_dport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * + NR_CXL_SWITCH_PORTS]; +struct platform_device + *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * NR_CXL_SWITCH_PORTS]; static struct acpi_device acpi0017_mock; static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = { @@ -28,12 +35,6 @@ static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = { [1] = { .handle = &host_bridge[1], }, - [2] = { - .handle = &host_bridge[2], - }, - [3] = { - .handle = &host_bridge[3], - }, }; static bool is_mock_dev(struct device *dev) @@ -71,7 +72,7 @@ static struct { } cfmws0; struct { struct acpi_cedt_cfmws cfmws; - u32 target[4]; + u32 target[2]; } cfmws1; struct { struct acpi_cedt_cfmws cfmws; @@ -79,7 +80,7 @@ static struct { } cfmws2; struct { struct acpi_cedt_cfmws cfmws; - u32 target[4]; + u32 target[2]; } cfmws3; } __packed mock_cedt = { .cedt = { @@ -105,22 +106,6 @@ static struct { .uid = 1, .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, }, - .chbs[2] = { - .header = { - .type = ACPI_CEDT_TYPE_CHBS, - .length = sizeof(mock_cedt.chbs[0]), - }, - .uid = 2, - .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, - }, - .chbs[3] = { - .header = { - .type = ACPI_CEDT_TYPE_CHBS, - .length = sizeof(mock_cedt.chbs[0]), - }, - .uid = 3, - .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, - }, .cfmws0 = { .cfmws = { .header = { @@ -142,14 +127,14 @@ static struct { .type = ACPI_CEDT_TYPE_CFMWS, .length = sizeof(mock_cedt.cfmws1), }, - .interleave_ways = 2, + .interleave_ways = 1, .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = 1, - .window_size = SZ_256M * 4, + .window_size = SZ_256M * 2, }, - .target = { 0, 1, 2, 3 }, + .target = { 0, 1, }, }, .cfmws2 = { .cfmws = { @@ -172,14 +157,14 @@ static struct { .type = ACPI_CEDT_TYPE_CFMWS, .length = sizeof(mock_cedt.cfmws3), }, - .interleave_ways = 2, + .interleave_ways = 1, .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = 3, - .window_size = SZ_256M * 4, + .window_size = SZ_256M * 2, }, - .target = { 0, 1, 2, 3 }, + .target = { 0, 1, }, }, }; @@ -332,6 +317,17 @@ static bool is_mock_port(struct device *dev) if (dev == &cxl_root_port[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_switch_uport); i++) + if (dev == &cxl_switch_uport[i]->dev) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) + if (dev == &cxl_switch_dport[i]->dev) + return true; + + if (is_cxl_memdev(dev)) + return is_mock_dev(dev->parent); + return false; } @@ -372,12 +368,6 @@ static struct acpi_pci_root mock_pci_root[NR_CXL_HOST_BRIDGES] = { [1] = { .bus = &mock_pci_bus[1], }, - [2] = { - .bus = &mock_pci_bus[2], - }, - [3] = { - .bus = &mock_pci_bus[3], - }, }; static bool is_mock_bus(struct pci_bus *bus) @@ -446,6 +436,26 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) dev_name(&pdev->dev)); } + for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) { + struct platform_device *pdev = cxl_switch_dport[i]; + struct cxl_dport *dport; + + if (pdev->dev.parent != port->uport) + continue; + + dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, + CXL_RESOURCE_NONE); + + if (IS_ERR(dport)) { + dev_err(dev, "failed to add dport: %s (%ld)\n", + dev_name(&pdev->dev), PTR_ERR(dport)); + return PTR_ERR(dport); + } + + dev_dbg(dev, "add dport%d: %s\n", pdev->id, + dev_name(&pdev->dev)); + } + return 0; } @@ -574,15 +584,51 @@ static __init int cxl_test_init(void) cxl_root_port[i] = pdev; } - BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_root_port)); + BUILD_BUG_ON(ARRAY_SIZE(cxl_switch_uport) != ARRAY_SIZE(cxl_root_port)); + for (i = 0; i < ARRAY_SIZE(cxl_switch_uport); i++) { + struct platform_device *root_port = cxl_root_port[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_switch_uport", i); + if (!pdev) + goto err_port; + pdev->dev.parent = &root_port->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_uport; + } + cxl_switch_uport[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) { + struct platform_device *uport = + cxl_switch_uport[i % ARRAY_SIZE(cxl_switch_uport)]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_switch_dport", i); + if (!pdev) + goto err_port; + pdev->dev.parent = &uport->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_dport; + } + cxl_switch_dport[i] = pdev; + } + + BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_switch_dport)); for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) { - struct platform_device *port = cxl_root_port[i]; + struct platform_device *dport = cxl_switch_dport[i]; struct platform_device *pdev; pdev = alloc_memdev(i); if (!pdev) goto err_mem; - pdev->dev.parent = &port->dev; + pdev->dev.parent = &dport->dev; set_dev_node(&pdev->dev, i % 2); rc = platform_device_add(pdev); @@ -611,6 +657,12 @@ static __init int cxl_test_init(void) err_mem: for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); +err_dport: + for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) + platform_device_unregister(cxl_switch_dport[i]); +err_uport: + for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--) + platform_device_unregister(cxl_switch_uport[i]); err_port: for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--) platform_device_unregister(cxl_root_port[i]); @@ -633,6 +685,10 @@ static __exit void cxl_test_exit(void) platform_device_unregister(cxl_acpi); for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); + for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) + platform_device_unregister(cxl_switch_dport[i]); + for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--) + platform_device_unregister(cxl_switch_uport[i]); for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--) platform_device_unregister(cxl_root_port[i]); for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) From 7c7d68db0254e1b50d2d80b47774fc605846d49c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:32:07 -0800 Subject: [PATCH 39/48] tools/testing/cxl: Enumerate mock decoders Enumerate 2-decoders per switch port and endpoint in the cxl_test topology. Link: https://lore.kernel.org/r/164298432699.3018233.12131068635065601541.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/test/cxl.c | 120 +++++++++++++++++++++++++++++------ 1 file changed, 99 insertions(+), 21 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index ea88fabc3198..1b36e67dcd7e 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -14,6 +14,7 @@ #define NR_CXL_HOST_BRIDGES 2 #define NR_CXL_ROOT_PORTS 2 #define NR_CXL_SWITCH_PORTS 2 +#define NR_CXL_PORT_DECODERS 2 static struct platform_device *cxl_acpi; static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; @@ -406,38 +407,115 @@ static int mock_cxl_add_passthrough_decoder(struct cxl_port *port) return -EOPNOTSUPP; } + +struct target_map_ctx { + int *target_map; + int index; + int target_count; +}; + +static int map_targets(struct device *dev, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct target_map_ctx *ctx = data; + + ctx->target_map[ctx->index++] = pdev->id; + + if (ctx->index > ctx->target_count) { + dev_WARN_ONCE(dev, 1, "too many targets found?\n"); + return -ENXIO; + } + + return 0; +} + static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) { + struct cxl_port *port = cxlhdm->port; + struct cxl_port *parent_port = to_cxl_port(port->dev.parent); + int target_count, i; + + if (is_cxl_endpoint(port)) + target_count = 0; + else if (is_cxl_root(parent_port)) + target_count = NR_CXL_ROOT_PORTS; + else + target_count = NR_CXL_SWITCH_PORTS; + + for (i = 0; i < NR_CXL_PORT_DECODERS; i++) { + int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; + struct target_map_ctx ctx = { + .target_map = target_map, + .target_count = target_count, + }; + struct cxl_decoder *cxld; + int rc; + + if (target_count) + cxld = cxl_switch_decoder_alloc(port, target_count); + else + cxld = cxl_endpoint_decoder_alloc(port); + if (IS_ERR(cxld)) { + dev_warn(&port->dev, + "Failed to allocate the decoder\n"); + return PTR_ERR(cxld); + } + + cxld->decoder_range = (struct range) { + .start = 0, + .end = -1, + }; + + cxld->flags = CXL_DECODER_F_ENABLE; + cxld->interleave_ways = min_not_zero(target_count, 1); + cxld->interleave_granularity = SZ_4K; + cxld->target_type = CXL_DECODER_EXPANDER; + + if (target_count) { + rc = device_for_each_child(port->uport, &ctx, + map_targets); + if (rc) { + put_device(&cxld->dev); + return rc; + } + } + + rc = cxl_decoder_add_locked(cxld, target_map); + if (rc) { + put_device(&cxld->dev); + dev_err(&port->dev, "Failed to add decoder\n"); + return rc; + } + + rc = cxl_decoder_autoremove(&port->dev, cxld); + if (rc) + return rc; + dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev)); + } + return 0; } static int mock_cxl_port_enumerate_dports(struct cxl_port *port) { struct device *dev = &port->dev; - int i; + struct platform_device **array; + int i, array_size; - for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) { - struct platform_device *pdev = cxl_root_port[i]; - struct cxl_dport *dport; - - if (pdev->dev.parent != port->uport) - continue; - - dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, - CXL_RESOURCE_NONE); - - if (IS_ERR(dport)) { - dev_err(dev, "failed to add dport: %s (%ld)\n", - dev_name(&pdev->dev), PTR_ERR(dport)); - return PTR_ERR(dport); - } - - dev_dbg(dev, "add dport%d: %s\n", pdev->id, - dev_name(&pdev->dev)); + if (port->depth == 1) { + array_size = ARRAY_SIZE(cxl_root_port); + array = cxl_root_port; + } else if (port->depth == 2) { + array_size = ARRAY_SIZE(cxl_switch_dport); + array = cxl_switch_dport; + } else { + dev_WARN_ONCE(&port->dev, 1, "unexpected depth %d\n", + port->depth); + return -ENXIO; } - for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) { - struct platform_device *pdev = cxl_switch_dport[i]; + for (i = 0; i < array_size; i++) { + struct platform_device *pdev = array[i]; struct cxl_dport *dport; if (pdev->dev.parent != port->uport) From 64cda3ae6bc785960cd1b4f78c19f7ca53e0130b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 23 Jan 2022 16:32:12 -0800 Subject: [PATCH 40/48] tools/testing/cxl: Add a physical_node link Emulate what ACPI does to link a host bridge platform firmware device to device node on the PCI bus. In this case it's just self referencing link, but it otherwise lets the tooling test out its lookup code. Link: https://lore.kernel.org/r/164298433209.3018233.18101085948127163720.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/test/cxl.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 1b36e67dcd7e..431f2bddf6c8 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -641,7 +641,12 @@ static __init int cxl_test_init(void) platform_device_put(pdev); goto err_bridge; } + cxl_host_bridge[i] = pdev; + rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, + "physical_node"); + if (rc) + goto err_bridge; } for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) { @@ -745,8 +750,14 @@ static __init int cxl_test_init(void) for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--) platform_device_unregister(cxl_root_port[i]); err_bridge: - for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) + for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_host_bridge[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "physical_node"); platform_device_unregister(cxl_host_bridge[i]); + } err_populate: depopulate_all_mock_resources(); err_gen_pool_add: @@ -769,8 +780,14 @@ static __exit void cxl_test_exit(void) platform_device_unregister(cxl_switch_uport[i]); for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--) platform_device_unregister(cxl_root_port[i]); - for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) + for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_host_bridge[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "physical_node"); platform_device_unregister(cxl_host_bridge[i]); + } depopulate_all_mock_resources(); gen_pool_destroy(cxl_mock_pool); unregister_cxl_mock_ops(&cxl_mock_ops); From 0909b4e5287bcda34f00da878ac1f37a0921d959 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 25 Jan 2022 21:24:04 -0800 Subject: [PATCH 41/48] cxl/core/port: Fix / relax decoder target enumeration If the decoder is not presently active the target_list may not be accurate. Perform a best effort mapping and assume that it will be fixed up when the decoder is enabled. Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164317464406.3438644.6609329492458460242.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 2 +- drivers/cxl/core/port.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index d8295572bde9..d15a6aec0331 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -15,7 +15,7 @@ static unsigned long cfmws_to_decoder_flags(int restrictions) { - unsigned long flags = 0; + unsigned long flags = CXL_DECODER_F_ENABLE; if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE2) flags |= CXL_DECODER_F_TYPE2; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index bc18d339738b..c3e83c624700 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1307,8 +1307,11 @@ int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) port = to_cxl_port(cxld->dev.parent); if (!is_endpoint_decoder(dev)) { rc = decoder_populate_targets(cxld, port, target_map); - if (rc) + if (rc && (cxld->flags & CXL_DECODER_F_ENABLE)) { + dev_err(&port->dev, + "Failed to populate active decoder targets\n"); return rc; + } } rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id); From 7004cc9d1585fb7fc9ec7e3e92b70b65e13b11fd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 25 Jan 2022 21:24:09 -0800 Subject: [PATCH 42/48] cxl/core/port: Handle invalid decoders In case init_hdm_decoder() finds invalid settings, skip to the next valid decoder. Only fail port enumeration if zero valid decoders are found. This protects the driver init against broken hardware and / or future interleave capabilities. Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/164317464918.3438644.12371149695618136198.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/hdm.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 05b0b292e72d..0e89a7a932d4 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -149,8 +149,8 @@ static int to_interleave_ways(u32 ctrl) } } -static void init_hdm_decoder(struct cxl_decoder *cxld, int *target_map, - void __iomem *hdm, int which) +static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, + int *target_map, void __iomem *hdm, int which) { u64 size, base; u32 ctrl; @@ -166,6 +166,11 @@ static void init_hdm_decoder(struct cxl_decoder *cxld, int *target_map, if (!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)) size = 0; + if (base == U64_MAX || size == U64_MAX) { + dev_warn(&port->dev, "decoder%d.%d: Invalid resource range\n", + port->id, cxld->id); + return -ENXIO; + } cxld->decoder_range = (struct range) { .start = base, @@ -179,6 +184,12 @@ static void init_hdm_decoder(struct cxl_decoder *cxld, int *target_map, cxld->flags |= CXL_DECODER_F_LOCK; } cxld->interleave_ways = to_interleave_ways(ctrl); + if (!cxld->interleave_ways) { + dev_warn(&port->dev, + "decoder%d.%d: Invalid interleave ways (ctrl: %#x)\n", + port->id, cxld->id, ctrl); + return -ENXIO; + } cxld->interleave_granularity = to_interleave_granularity(ctrl); if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl)) @@ -187,12 +198,14 @@ static void init_hdm_decoder(struct cxl_decoder *cxld, int *target_map, cxld->target_type = CXL_DECODER_ACCELERATOR; if (is_cxl_endpoint(to_cxl_port(cxld->dev.parent))) - return; + return 0; target_list.value = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which)); for (i = 0; i < cxld->interleave_ways; i++) target_map[i] = target_list.target_id[i]; + + return 0; } /** @@ -203,7 +216,7 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) { void __iomem *hdm = cxlhdm->regs.hdm_decoder; struct cxl_port *port = cxlhdm->port; - int i, committed; + int i, committed, failed; u32 ctrl; /* @@ -223,7 +236,7 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) if (committed != cxlhdm->decoder_count) msleep(20); - for (i = 0; i < cxlhdm->decoder_count; i++) { + for (i = 0, failed = 0; i < cxlhdm->decoder_count; i++) { int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; int rc, target_count = cxlhdm->target_count; struct cxl_decoder *cxld; @@ -238,7 +251,13 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) return PTR_ERR(cxld); } - init_hdm_decoder(cxld, target_map, cxlhdm->regs.hdm_decoder, i); + rc = init_hdm_decoder(port, cxld, target_map, + cxlhdm->regs.hdm_decoder, i); + if (rc) { + put_device(&cxld->dev); + failed++; + continue; + } rc = add_hdm_decoder(port, cxld, target_map); if (rc) { dev_warn(&port->dev, @@ -247,6 +266,11 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) } } + if (failed == cxlhdm->decoder_count) { + dev_err(&port->dev, "No valid decoders found\n"); + return -ENXIO; + } + return 0; } EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_decoders, CXL); From 74b0fe80409733055971bbfaf33c80a33fddeeb3 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 1 Feb 2022 15:34:37 +0000 Subject: [PATCH 43/48] cxl/regs: Fix size of CXL Capability Header Register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In CXL 2.0, 8.2.5.1 CXL Capability Header Register: this register is given as 32 bits. 8.2.3 which covers the CXL 2.0 Component registers, including the CXL Capability Header Register states that access restrictions specified in Section 8.2.2 apply. 8.2.2 includes: * A 32 bit register shall be accessed as a 4 Byte quantity. ... If these rules are not followed, the behavior is undefined. Discovered during review of CXL QEMU emulation. Alex Bennée pointed out there was a comment saying that 4 byte registers must be read with a 4 byte read, but 8 byte reads were being emulated. https://lore.kernel.org/qemu-devel/87bkzyd3c7.fsf@linaro.org/ Fixing that, led to this code failing. Whilst a given hardware implementation 'might' work with an 8 byte read, it should not be relied upon. The QEMU emulation v5 will return 0 and log the wrong access width. The code moved, so one fixes tag for where this will directly apply and also a reference to the earlier introduction of the code for backports. Fixes: 0f06157e0135 ("cxl/core: Move register mapping infrastructure") Fixes: 08422378c4ad ("cxl/pci: Add HDM decoder capabilities") Signed-off-by: Jonathan Cameron Cc: Alex Bennée Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/20220201153437.2873-1-Jonathan.Cameron@huawei.com Signed-off-by: Dan Williams --- drivers/cxl/core/regs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 718b6b0ae4b3..39a129c57d40 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -36,7 +36,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map) { int cap, cap_count; - u64 cap_array; + u32 cap_array; *map = (struct cxl_component_reg_map) { 0 }; @@ -46,7 +46,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, */ base += CXL_CM_OFFSET; - cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET); + cap_array = readl(base + CXL_CM_CAP_HDR_OFFSET); if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) { dev_err(dev, From 5c3c067b601bcbcd381214e3a40e666fda5f3d6f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Feb 2022 23:37:28 -0800 Subject: [PATCH 44/48] cxl/core/port: Fix unregister_port() lock assertion The device_lock_assert() in unregister_port() fails to pick the right device leading to splats like the following from: echo "ACPI0017:00" > /sys/bus/platform/drivers/cxl_acpi/unbind WARNING: CPU: 32 PID: 1147 at include/linux/device.h:787 unregister_port+0x49/0x50 [cxl_c [..] RIP: 0010:unregister_port+0x49/0x50 [cxl_core] [..] Call Trace: release_nodes+0x63/0x80 devres_release_all+0x8b/0xc0 __device_release_driver+0x190/0x240 device_driver_detach+0x3e/0xa0 unbind_store+0x113/0x130 Fix it up to assert on the device_lock() for ACPI0017 for root and 1st level ports, and parent ports for all the rest. Fixes: 54cdbf845cf7 ("cxl/port: Add a driver for 'struct cxl_port' objects") Reported-by: Ben Widawsky Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/164439224893.2941117.18331456248117887720.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index c3e83c624700..9b4bbd51fbaa 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -348,12 +348,28 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_port, CXL); static void unregister_port(void *_port) { struct cxl_port *port = _port; + struct cxl_port *parent; + struct device *lock_dev; - if (!is_cxl_root(port)) { - device_lock_assert(port->dev.parent); - port->uport = NULL; - } + if (is_cxl_root(port)) + parent = NULL; + else + parent = to_cxl_port(port->dev.parent); + /* + * CXL root port's and the first level of ports are unregistered + * under the platform firmware device lock, all other ports are + * unregistered while holding their parent port lock. + */ + if (!parent) + lock_dev = port->uport; + else if (is_cxl_root(parent)) + lock_dev = parent->uport; + else + lock_dev = &parent->dev; + + device_lock_assert(lock_dev); + port->uport = NULL; device_unregister(&port->dev); } From e6e17cc6ed751072513fe16cb595ac09f6821a43 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Feb 2022 23:37:34 -0800 Subject: [PATCH 45/48] cxl/core: Fix cxl_device_lock() class detection If cxl_device_lock() is used on a non-CXL device the expectation is that the lock class will fall back to CXL_ANON_LOCK. Instead it crashes when trying to determine if the device is a 'decoder'. Specifically when the device has a NULL type pointer. Just check for NULL before de-referencing ->release. Fixes: 3c5b90395525 ("cxl: Prove CXL locking") Reported-by: Ben Widawsky Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/164439225406.2941117.3927102269866914339.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 9b4bbd51fbaa..d29eb2abdbc2 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -284,7 +284,7 @@ EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL); bool is_cxl_decoder(struct device *dev) { - return dev->type->release == cxl_decoder_release; + return dev->type && dev->type->release == cxl_decoder_release; } EXPORT_SYMBOL_NS_GPL(is_cxl_decoder, CXL); From 41ae9105f5e23e98a1734be2eeddddf488e42c2e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 10 Feb 2022 17:04:42 -0800 Subject: [PATCH 46/48] cxl/port: Fix endpoint refcount leak An endpoint can be unregistered via two paths. Either its parent port is unregistered, or the memdev that registered the endpoint is removed. The memdev remove path is responsible for synchronizing against the parent ->remove() event and if the memdev remove path wins, manually trigger unregister_port() via devm_release_action(). Until that race is resolved the memdev remove path holds a reference on the endpoint. If the parent port for the endpoint can not be found that is an indication that the endpoint has already been registered. Be sure to drop the reference in all exit paths from delete_endpoint(). Fixes: 8dd2bc0f8e02 ("cxl/mem: Add the cxl_mem driver") Link: https://lore.kernel.org/r/164454148209.3429624.12905500880311609053.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index d29eb2abdbc2..0fc1441be014 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -850,7 +850,7 @@ static void delete_endpoint(void *data) parent_port = cxl_mem_find_port(cxlmd); if (!parent_port) - return; + goto out; parent = &parent_port->dev; cxl_device_lock(parent); @@ -860,6 +860,7 @@ static void delete_endpoint(void *data) } cxl_device_unlock(parent); put_device(parent); +out: put_device(&endpoint->dev); } From 74be98774dfbc5b8b795db726bd772e735d2edd4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 16 Feb 2022 16:25:11 -0800 Subject: [PATCH 47/48] cxl/port: Hold port reference until decoder release KASAN + DEBUG_KOBJECT_RELEASE reports a potential use-after-free in cxl_decoder_release() where it goes to reference its parent, a cxl_port, to free its id back to port->decoder_ida. BUG: KASAN: use-after-free in to_cxl_port+0x18/0x90 [cxl_core] Read of size 8 at addr ffff888119270908 by task kworker/35:2/379 CPU: 35 PID: 379 Comm: kworker/35:2 Tainted: G OE 5.17.0-rc2+ #198 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Workqueue: events kobject_delayed_cleanup Call Trace: dump_stack_lvl+0x59/0x73 print_address_description.constprop.0+0x1f/0x150 ? to_cxl_port+0x18/0x90 [cxl_core] kasan_report.cold+0x83/0xdf ? to_cxl_port+0x18/0x90 [cxl_core] to_cxl_port+0x18/0x90 [cxl_core] cxl_decoder_release+0x2a/0x60 [cxl_core] device_release+0x5f/0x100 kobject_cleanup+0x80/0x1c0 The device core only guarantees parent lifetime until all children are unregistered. If a child needs a parent to complete its ->release() callback that child needs to hold a reference to extend the lifetime of the parent. Fixes: 40ba17afdfab ("cxl/acpi: Introduce cxl_decoder objects") Reported-by: Ben Widawsky Tested-by: Ben Widawsky Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/164505751190.4175768.13324905271463416712.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 0fc1441be014..c1681248f322 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -251,6 +251,7 @@ static void cxl_decoder_release(struct device *dev) ida_free(&port->decoder_ida, cxld->id); kfree(cxld); + put_device(&port->dev); } static const struct device_type cxl_decoder_endpoint_type = { @@ -1202,7 +1203,10 @@ static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, if (rc < 0) goto err; + /* need parent to stick around to release the id */ + get_device(&port->dev); cxld->id = rc; + cxld->nr_targets = nr_targets; seqlock_init(&cxld->target_lock); dev = &cxld->dev; From 05e815539f3f161585c13a9ab023341bade2c52f Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Mon, 7 Mar 2022 17:41:48 +0800 Subject: [PATCH 48/48] cxl/core/port: Fix NULL but dereferenced coccicheck error Fix the following coccicheck warning: ./drivers/cxl/core/port.c:913:21-24: ERROR: port is NULL but dereferenced. The put_device() is only relevent in the is_cxl_root() case. Fixes: 2703c16c75ae ("cxl/core/port: Add switch port enumeration") Signed-off-by: Wan Jiabing Link: https://lore.kernel.org/r/20220307094158.404882-1-wanjiabing@vivo.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index c1681248f322..2ab1ba4499b3 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -911,7 +911,10 @@ static void cxl_detach_ep(void *data) break; port = find_cxl_port(dport_dev); - if (!port || is_cxl_root(port)) { + if (!port) + continue; + + if (is_cxl_root(port)) { put_device(&port->dev); continue; }