linux/arch/powerpc/platforms/powernv/pci-p5ioc2.c

267 lines
7.5 KiB
C
Raw Normal View History

/*
* Support PCI/PCIe on PowerNV platforms
*
* Currently supports only P5IOC2
*
* Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/irq.h>
#include <linux/io.h>
#include <linux/msi.h>
#include <asm/sections.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/msi_bitmap.h>
#include <asm/ppc-pci.h>
#include <asm/opal.h>
#include <asm/iommu.h>
#include <asm/tce.h>
#include "powernv.h"
#include "pci.h"
/* For now, use a fixed amount of TCE memory for each p5ioc2
* hub, 16M will do
*/
#define P5IOC2_TCE_MEMORY 0x01000000
#ifdef CONFIG_PCI_MSI
static int pnv_pci_p5ioc2_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
unsigned int hwirq, unsigned int virq,
unsigned int is_64, struct msi_msg *msg)
{
if (WARN_ON(!is_64))
return -ENXIO;
msg->data = hwirq - phb->msi_base;
msg->address_hi = 0x10000000;
msg->address_lo = 0;
return 0;
}
static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb)
{
unsigned int count;
const __be32 *prop = of_get_property(phb->hose->dn,
"ibm,opal-msi-ranges", NULL);
if (!prop)
return;
/* Don't do MSI's on p5ioc2 PCI-X are they are not properly
* verified in HW
*/
if (of_device_is_compatible(phb->hose->dn, "ibm,p5ioc2-pcix"))
return;
phb->msi_base = be32_to_cpup(prop);
count = be32_to_cpup(prop + 1);
if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
phb->hose->global_number);
return;
}
phb->msi_setup = pnv_pci_p5ioc2_msi_setup;
phb->msi32_support = 0;
pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
count, phb->msi_base);
}
#else
static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
#endif /* CONFIG_PCI_MSI */
powerpc/iommu: Move tce_xxx callbacks from ppc_md to iommu_table This adds a iommu_table_ops struct and puts pointer to it into the iommu_table struct. This moves tce_build/tce_free/tce_get/tce_flush callbacks from ppc_md to the new struct where they really belong to. This adds the requirement for @it_ops to be initialized before calling iommu_init_table() to make sure that we do not leave any IOMMU table with iommu_table_ops uninitialized. This is not a parameter of iommu_init_table() though as there will be cases when iommu_init_table() will not be called on TCE tables, for example - VFIO. This does s/tce_build/set/, s/tce_free/clear/ and removes "tce_" redundant prefixes. This removes tce_xxx_rm handlers from ppc_md but does not add them to iommu_table_ops as this will be done later if we decide to support TCE hypercalls in real mode. This removes _vm callbacks as only virtual mode is supported by now so this also removes @rm parameter. For pSeries, this always uses tce_buildmulti_pSeriesLP/ tce_buildmulti_pSeriesLP. This changes multi callback to fall back to tce_build_pSeriesLP/tce_free_pSeriesLP if FW_FEATURE_MULTITCE is not present. The reason for this is we still have to support "multitce=off" boot parameter in disable_multitce() and we do not want to walk through all IOMMU tables in the system and replace "multi" callbacks with single ones. For powernv, this defines _ops per PHB type which are P5IOC2/IODA1/IODA2. This makes the callbacks for them public. Later patches will extend callbacks for IODA1/2. No change in behaviour is expected. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2015-06-05 14:35:06 +08:00
static struct iommu_table_ops pnv_p5ioc2_iommu_ops = {
.set = pnv_tce_build,
powerpc/iommu/powernv: Release replaced TCE At the moment writing new TCE value to the IOMMU table fails with EBUSY if there is a valid entry already. However PAPR specification allows the guest to write new TCE value without clearing it first. Another problem this patch is addressing is the use of pool locks for external IOMMU users such as VFIO. The pool locks are to protect DMA page allocator rather than entries and since the host kernel does not control what pages are in use, there is no point in pool locks and exchange()+put_page(oldtce) is sufficient to avoid possible races. This adds an exchange() callback to iommu_table_ops which does the same thing as set() plus it returns replaced TCE and DMA direction so the caller can release the pages afterwards. The exchange() receives a physical address unlike set() which receives linear mapping address; and returns a physical address as the clear() does. This implements exchange() for P5IOC2/IODA/IODA2. This adds a requirement for a platform to have exchange() implemented in order to support VFIO. This replaces iommu_tce_build() and iommu_clear_tce() with a single iommu_tce_xchg(). This makes sure that TCE permission bits are not set in TCE passed to IOMMU API as those are to be calculated by platform code from DMA direction. This moves SetPageDirty() to the IOMMU code to make it work for both VFIO ioctl interface in in-kernel TCE acceleration (when it becomes available later). Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> [aw: for the vfio related changes] Acked-by: Alex Williamson <alex.williamson@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2015-06-05 14:35:15 +08:00
#ifdef CONFIG_IOMMU_API
.exchange = pnv_tce_xchg,
#endif
powerpc/iommu: Move tce_xxx callbacks from ppc_md to iommu_table This adds a iommu_table_ops struct and puts pointer to it into the iommu_table struct. This moves tce_build/tce_free/tce_get/tce_flush callbacks from ppc_md to the new struct where they really belong to. This adds the requirement for @it_ops to be initialized before calling iommu_init_table() to make sure that we do not leave any IOMMU table with iommu_table_ops uninitialized. This is not a parameter of iommu_init_table() though as there will be cases when iommu_init_table() will not be called on TCE tables, for example - VFIO. This does s/tce_build/set/, s/tce_free/clear/ and removes "tce_" redundant prefixes. This removes tce_xxx_rm handlers from ppc_md but does not add them to iommu_table_ops as this will be done later if we decide to support TCE hypercalls in real mode. This removes _vm callbacks as only virtual mode is supported by now so this also removes @rm parameter. For pSeries, this always uses tce_buildmulti_pSeriesLP/ tce_buildmulti_pSeriesLP. This changes multi callback to fall back to tce_build_pSeriesLP/tce_free_pSeriesLP if FW_FEATURE_MULTITCE is not present. The reason for this is we still have to support "multitce=off" boot parameter in disable_multitce() and we do not want to walk through all IOMMU tables in the system and replace "multi" callbacks with single ones. For powernv, this defines _ops per PHB type which are P5IOC2/IODA1/IODA2. This makes the callbacks for them public. Later patches will extend callbacks for IODA1/2. No change in behaviour is expected. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2015-06-05 14:35:06 +08:00
.clear = pnv_tce_free,
.get = pnv_tce_get,
};
static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
struct pci_dev *pdev)
{
powerpc/spapr: vfio: Replace iommu_table with iommu_table_group Modern IBM POWERPC systems support multiple (currently two) TCE tables per IOMMU group (a.k.a. PE). This adds a iommu_table_group container for TCE tables. Right now just one table is supported. This defines iommu_table_group struct which stores pointers to iommu_group and iommu_table(s). This replaces iommu_table with iommu_table_group where iommu_table was used to identify a group: - iommu_register_group(); - iommudata of generic iommu_group; This removes @data from iommu_table as it_table_group provides same access to pnv_ioda_pe. For IODA, instead of embedding iommu_table, the new iommu_table_group keeps pointers to those. The iommu_table structs are allocated dynamically. For P5IOC2, both iommu_table_group and iommu_table are embedded into PE struct. As there is no EEH and SRIOV support for P5IOC2, iommu_free_table() should not be called on iommu_table struct pointers so we can keep it embedded in pnv_phb::p5ioc2. For pSeries, this replaces multiple calls of kzalloc_node() with a new iommu_pseries_alloc_group() helper and stores the table group struct pointer into the pci_dn struct. For release, a iommu_table_free_group() helper is added. This moves iommu_table struct allocation from SR-IOV code to the generic DMA initialization code in pnv_pci_ioda_setup_dma_pe and pnv_pci_ioda2_setup_dma_pe as this is where DMA is actually initialized. This change is here because those lines had to be changed anyway. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> [aw: for the vfio related changes] Acked-by: Alex Williamson <alex.williamson@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2015-06-05 14:35:08 +08:00
struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0];
if (!tbl->it_map) {
tbl->it_ops = &pnv_p5ioc2_iommu_ops;
iommu_init_table(tbl, phb->hose->node);
iommu_register_group(&phb->p5ioc2.table_group,
pci_domain_nr(phb->hose->bus), phb->opal_id);
INIT_LIST_HEAD_RCU(&tbl->it_group_list);
pnv_pci_link_table_and_group(phb->hose->node, 0,
tbl, &phb->p5ioc2.table_group);
}
powerpc/spapr: vfio: Replace iommu_table with iommu_table_group Modern IBM POWERPC systems support multiple (currently two) TCE tables per IOMMU group (a.k.a. PE). This adds a iommu_table_group container for TCE tables. Right now just one table is supported. This defines iommu_table_group struct which stores pointers to iommu_group and iommu_table(s). This replaces iommu_table with iommu_table_group where iommu_table was used to identify a group: - iommu_register_group(); - iommudata of generic iommu_group; This removes @data from iommu_table as it_table_group provides same access to pnv_ioda_pe. For IODA, instead of embedding iommu_table, the new iommu_table_group keeps pointers to those. The iommu_table structs are allocated dynamically. For P5IOC2, both iommu_table_group and iommu_table are embedded into PE struct. As there is no EEH and SRIOV support for P5IOC2, iommu_free_table() should not be called on iommu_table struct pointers so we can keep it embedded in pnv_phb::p5ioc2. For pSeries, this replaces multiple calls of kzalloc_node() with a new iommu_pseries_alloc_group() helper and stores the table group struct pointer into the pci_dn struct. For release, a iommu_table_free_group() helper is added. This moves iommu_table struct allocation from SR-IOV code to the generic DMA initialization code in pnv_pci_ioda_setup_dma_pe and pnv_pci_ioda2_setup_dma_pe as this is where DMA is actually initialized. This change is here because those lines had to be changed anyway. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> [aw: for the vfio related changes] Acked-by: Alex Williamson <alex.williamson@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2015-06-05 14:35:08 +08:00
set_iommu_table_base(&pdev->dev, tbl);
iommu_add_device(&pdev->dev);
}
static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
#ifdef CONFIG_PCI_MSI
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
#endif
};
static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
void *tce_mem, u64 tce_size)
{
struct pnv_phb *phb;
const __be64 *prop64;
u64 phb_id;
int64_t rc;
static int primary = 1;
pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name);
prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
if (!prop64) {
pr_err(" Missing \"ibm,opal-phbid\" property !\n");
return;
}
phb_id = be64_to_cpup(prop64);
pr_devel(" PHB-ID : 0x%016llx\n", phb_id);
pr_devel(" TCE AT : 0x%016lx\n", __pa(tce_mem));
pr_devel(" TCE SZ : 0x%016llx\n", tce_size);
rc = opal_pci_set_phb_tce_memory(phb_id, __pa(tce_mem), tce_size);
if (rc != OPAL_SUCCESS) {
pr_err(" Failed to set TCE memory, OPAL error %lld\n", rc);
return;
}
phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0);
phb->hose = pcibios_alloc_controller(np);
if (!phb->hose) {
pr_err(" Failed to allocate PCI controller\n");
return;
}
spin_lock_init(&phb->lock);
phb->hose->first_busno = 0;
phb->hose->last_busno = 0xff;
phb->hose->private_data = phb;
phb->hose->controller_ops = pnv_pci_p5ioc2_controller_ops;
phb->hub_id = hub_id;
phb->opal_id = phb_id;
phb->type = PNV_PHB_P5IOC2;
phb->model = PNV_PHB_MODEL_P5IOC2;
phb->regs = of_iomap(np, 0);
if (phb->regs == NULL)
pr_err(" Failed to map registers !\n");
else {
pr_devel(" P_BUID = 0x%08x\n", in_be32(phb->regs + 0x100));
pr_devel(" P_IOSZ = 0x%08x\n", in_be32(phb->regs + 0x1b0));
pr_devel(" P_IO_ST = 0x%08x\n", in_be32(phb->regs + 0x1e0));
pr_devel(" P_MEM1_H = 0x%08x\n", in_be32(phb->regs + 0x1a0));
pr_devel(" P_MEM1_L = 0x%08x\n", in_be32(phb->regs + 0x190));
pr_devel(" P_MSZ1_L = 0x%08x\n", in_be32(phb->regs + 0x1c0));
pr_devel(" P_MEM_ST = 0x%08x\n", in_be32(phb->regs + 0x1d0));
pr_devel(" P_MEM2_H = 0x%08x\n", in_be32(phb->regs + 0x2c0));
pr_devel(" P_MEM2_L = 0x%08x\n", in_be32(phb->regs + 0x2b0));
pr_devel(" P_MSZ2_H = 0x%08x\n", in_be32(phb->regs + 0x2d0));
pr_devel(" P_MSZ2_L = 0x%08x\n", in_be32(phb->regs + 0x2e0));
}
/* Interpret the "ranges" property */
/* This also maps the I/O region and sets isa_io/mem_base */
pci_process_bridge_OF_ranges(phb->hose, np, primary);
primary = 0;
phb->hose->ops = &pnv_pci_ops;
/* Setup MSI support */
pnv_pci_init_p5ioc2_msis(phb);
/* Setup TCEs */
phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup;
pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table,
tce_mem, tce_size, 0,
IOMMU_PAGE_SHIFT_4K);
powerpc/spapr: vfio: Replace iommu_table with iommu_table_group Modern IBM POWERPC systems support multiple (currently two) TCE tables per IOMMU group (a.k.a. PE). This adds a iommu_table_group container for TCE tables. Right now just one table is supported. This defines iommu_table_group struct which stores pointers to iommu_group and iommu_table(s). This replaces iommu_table with iommu_table_group where iommu_table was used to identify a group: - iommu_register_group(); - iommudata of generic iommu_group; This removes @data from iommu_table as it_table_group provides same access to pnv_ioda_pe. For IODA, instead of embedding iommu_table, the new iommu_table_group keeps pointers to those. The iommu_table structs are allocated dynamically. For P5IOC2, both iommu_table_group and iommu_table are embedded into PE struct. As there is no EEH and SRIOV support for P5IOC2, iommu_free_table() should not be called on iommu_table struct pointers so we can keep it embedded in pnv_phb::p5ioc2. For pSeries, this replaces multiple calls of kzalloc_node() with a new iommu_pseries_alloc_group() helper and stores the table group struct pointer into the pci_dn struct. For release, a iommu_table_free_group() helper is added. This moves iommu_table struct allocation from SR-IOV code to the generic DMA initialization code in pnv_pci_ioda_setup_dma_pe and pnv_pci_ioda2_setup_dma_pe as this is where DMA is actually initialized. This change is here because those lines had to be changed anyway. This should cause no behavioural change. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> [aw: for the vfio related changes] Acked-by: Alex Williamson <alex.williamson@redhat.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2015-06-05 14:35:08 +08:00
/*
* We do not allocate iommu_table as we do not support
* hotplug or SRIOV on P5IOC2 and therefore iommu_free_table()
* should not be called for phb->p5ioc2.table_group.tables[0] ever.
*/
phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table;
}
void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
{
struct device_node *phbn;
const __be64 *prop64;
u64 hub_id;
void *tce_mem;
uint64_t tce_per_phb;
int64_t rc;
int phb_count = 0;
pr_info("Probing p5ioc2 IO-Hub %s\n", np->full_name);
prop64 = of_get_property(np, "ibm,opal-hubid", NULL);
if (!prop64) {
pr_err(" Missing \"ibm,opal-hubid\" property !\n");
return;
}
hub_id = be64_to_cpup(prop64);
pr_info(" HUB-ID : 0x%016llx\n", hub_id);
/* Count child PHBs and calculate TCE space per PHB */
for_each_child_of_node(np, phbn) {
if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
of_device_is_compatible(phbn, "ibm,p5ioc2-pciex"))
phb_count++;
}
if (phb_count <= 0) {
pr_info(" No PHBs for Hub %s\n", np->full_name);
return;
}
tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count);
pr_info(" Allocating %lld MB of TCE memory per PHB\n",
tce_per_phb >> 20);
/* Currently allocate 16M of TCE memory for every Hub
*
* XXX TODO: Make it chip local if possible
*/
tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY);
pr_debug(" TCE : 0x%016lx..0x%016lx\n",
__pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1);
rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem),
P5IOC2_TCE_MEMORY);
if (rc != OPAL_SUCCESS) {
pr_err(" Failed to allocate TCE memory, OPAL error %lld\n", rc);
return;
}
/* Initialize PHBs */
for_each_child_of_node(np, phbn) {
if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) {
pnv_pci_init_p5ioc2_phb(phbn, hub_id,
tce_mem, tce_per_phb);
tce_mem += tce_per_phb;
}
}
}