powerpc/powernv/ioda1: Improve DMA32 segment track

In current implementation, the DMA32 segments required by one specific
PE isn't calculated with the information hold in the PE independently.
It conflicts with the PCI hotplug design: PE centralized, meaning the
PE's DMA32 segments should be calculated from the information hold in
the PE independently.

This introduces an array (@dma32_segmap) for every PHB to track the
DMA32 segmeng usage. Besides, this moves the logic calculating PE's
consumed DMA32 segments to pnv_pci_ioda1_setup_dma_pe() so that PE's
DMA32 segments are calculated/allocated from the information hold in
the PE (DMA32 weight). Also the logic is improved: we try to allocate
as much DMA32 segments as we can. It's acceptable that number of DMA32
segments less than the expected number are allocated.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
Gavin Shan 2016-05-05 12:04:16 +10:00 committed by Michael Ellerman
parent 801846d1de
commit 2b923ed1bd
2 changed files with 66 additions and 56 deletions

View File

@ -2011,27 +2011,62 @@ static unsigned int pnv_pci_ioda_pe_dma_weight(struct pnv_ioda_pe *pe)
} }
static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe, struct pnv_ioda_pe *pe)
unsigned int base,
unsigned int segs)
{ {
struct page *tce_mem = NULL; struct page *tce_mem = NULL;
struct iommu_table *tbl; struct iommu_table *tbl;
unsigned int tce32_segsz, i; unsigned int weight, total_weight = 0;
unsigned int tce32_segsz, base, segs, avail, i;
int64_t rc; int64_t rc;
void *addr; void *addr;
/* XXX FIXME: Handle 64-bit only DMA devices */ /* XXX FIXME: Handle 64-bit only DMA devices */
/* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
/* XXX FIXME: Allocate multi-level tables on PHB3 */ /* XXX FIXME: Allocate multi-level tables on PHB3 */
weight = pnv_pci_ioda_pe_dma_weight(pe);
if (!weight)
return;
pci_walk_bus(phb->hose->bus, pnv_pci_ioda_dev_dma_weight,
&total_weight);
segs = (weight * phb->ioda.dma32_count) / total_weight;
if (!segs)
segs = 1;
/*
* Allocate contiguous DMA32 segments. We begin with the expected
* number of segments. With one more attempt, the number of DMA32
* segments to be allocated is decreased by one until one segment
* is allocated successfully.
*/
do {
for (base = 0; base <= phb->ioda.dma32_count - segs; base++) {
for (avail = 0, i = base; i < base + segs; i++) {
if (phb->ioda.dma32_segmap[i] ==
IODA_INVALID_PE)
avail++;
}
if (avail == segs)
goto found;
}
} while (--segs);
if (!segs) {
pe_warn(pe, "No available DMA32 segments\n");
return;
}
found:
tbl = pnv_pci_table_alloc(phb->hose->node); tbl = pnv_pci_table_alloc(phb->hose->node);
iommu_register_group(&pe->table_group, phb->hose->global_number, iommu_register_group(&pe->table_group, phb->hose->global_number,
pe->pe_number); pe->pe_number);
pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group); pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
/* Grab a 32-bit TCE table */ /* Grab a 32-bit TCE table */
pe_info(pe, "DMA weight %d (%d), assigned (%d) %d DMA32 segments\n",
weight, total_weight, base, segs);
pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n", pe_info(pe, " Setting up 32-bit TCE table at %08x..%08x\n",
base * PNV_IODA1_DMA32_SEGSIZE, base * PNV_IODA1_DMA32_SEGSIZE,
(base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1); (base + segs) * PNV_IODA1_DMA32_SEGSIZE - 1);
@ -2068,6 +2103,10 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
} }
} }
/* Setup DMA32 segment mapping */
for (i = base; i < base + segs; i++)
phb->ioda.dma32_segmap[i] = pe->pe_number;
/* Setup linux iommu table */ /* Setup linux iommu table */
pnv_pci_setup_iommu_table(tbl, addr, tce32_segsz * segs, pnv_pci_setup_iommu_table(tbl, addr, tce32_segsz * segs,
base * PNV_IODA1_DMA32_SEGSIZE, base * PNV_IODA1_DMA32_SEGSIZE,
@ -2542,73 +2581,34 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
static void pnv_ioda_setup_dma(struct pnv_phb *phb) static void pnv_ioda_setup_dma(struct pnv_phb *phb)
{ {
struct pci_controller *hose = phb->hose; struct pci_controller *hose = phb->hose;
unsigned int weight, total_weight, dma_pe_count;
unsigned int residual, remaining, segs, base;
struct pnv_ioda_pe *pe; struct pnv_ioda_pe *pe;
unsigned int weight;
total_weight = 0;
pci_walk_bus(phb->hose->bus, pnv_pci_ioda_dev_dma_weight,
&total_weight);
dma_pe_count = 0;
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
weight = pnv_pci_ioda_pe_dma_weight(pe);
if (weight > 0)
dma_pe_count++;
}
/* If we have more PE# than segments available, hand out one /* If we have more PE# than segments available, hand out one
* per PE until we run out and let the rest fail. If not, * per PE until we run out and let the rest fail. If not,
* then we assign at least one segment per PE, plus more based * then we assign at least one segment per PE, plus more based
* on the amount of devices under that PE * on the amount of devices under that PE
*/ */
if (dma_pe_count > phb->ioda.tce32_count) pr_info("PCI: Domain %04x has %d available 32-bit DMA segments\n",
residual = 0; hose->global_number, phb->ioda.dma32_count);
else
residual = phb->ioda.tce32_count - dma_pe_count;
pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n",
hose->global_number, phb->ioda.tce32_count);
pr_info("PCI: %d PE# for a total weight of %d\n",
dma_pe_count, total_weight);
pnv_pci_ioda_setup_opal_tce_kill(phb); pnv_pci_ioda_setup_opal_tce_kill(phb);
/* Walk our PE list and configure their DMA segments, hand them /* Walk our PE list and configure their DMA segments */
* out one base segment plus any residual segments based on
* weight
*/
remaining = phb->ioda.tce32_count;
base = 0;
list_for_each_entry(pe, &phb->ioda.pe_list, list) { list_for_each_entry(pe, &phb->ioda.pe_list, list) {
weight = pnv_pci_ioda_pe_dma_weight(pe); weight = pnv_pci_ioda_pe_dma_weight(pe);
if (!weight) if (!weight)
continue; continue;
if (!remaining) {
pe_warn(pe, "No DMA32 resources available\n");
continue;
}
segs = 1;
if (residual) {
segs += ((weight * residual) + (total_weight / 2)) /
total_weight;
if (segs > remaining)
segs = remaining;
}
/* /*
* For IODA2 compliant PHB3, we needn't care about the weight. * For IODA2 compliant PHB3, we needn't care about the weight.
* The all available 32-bits DMA space will be assigned to * The all available 32-bits DMA space will be assigned to
* the specific PE. * the specific PE.
*/ */
if (phb->type == PNV_PHB_IODA1) { if (phb->type == PNV_PHB_IODA1) {
pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", pnv_pci_ioda1_setup_dma_pe(phb, pe);
weight, segs);
pnv_pci_ioda1_setup_dma_pe(phb, pe, base, segs);
} else if (phb->type == PNV_PHB_IODA2) { } else if (phb->type == PNV_PHB_IODA2) {
pe_info(pe, "Assign DMA32 space\n"); pe_info(pe, "Assign DMA32 space\n");
segs = 0;
pnv_pci_ioda2_setup_dma_pe(phb, pe); pnv_pci_ioda2_setup_dma_pe(phb, pe);
} else if (phb->type == PNV_PHB_NPU) { } else if (phb->type == PNV_PHB_NPU) {
/* /*
@ -2618,9 +2618,6 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
* as the PHB3 TVT. * as the PHB3 TVT.
*/ */
} }
remaining -= segs;
base += segs;
} }
} }
@ -3327,7 +3324,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
{ {
struct pci_controller *hose; struct pci_controller *hose;
struct pnv_phb *phb; struct pnv_phb *phb;
unsigned long size, m64map_off, m32map_off, pemap_off, iomap_off = 0; unsigned long size, m64map_off, m32map_off, pemap_off;
unsigned long iomap_off = 0, dma32map_off = 0;
const __be64 *prop64; const __be64 *prop64;
const __be32 *prop32; const __be32 *prop32;
int len; int len;
@ -3413,6 +3411,10 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num; phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num;
phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
/* Calculate how many 32-bit TCE segments we have */
phb->ioda.dma32_count = phb->ioda.m32_pci_base /
PNV_IODA1_DMA32_SEGSIZE;
/* Allocate aux data & arrays. We don't have IO ports on PHB3 */ /* Allocate aux data & arrays. We don't have IO ports on PHB3 */
size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long)); size = _ALIGN_UP(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
m64map_off = size; m64map_off = size;
@ -3422,6 +3424,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
if (phb->type == PNV_PHB_IODA1) { if (phb->type == PNV_PHB_IODA1) {
iomap_off = size; iomap_off = size;
size += phb->ioda.total_pe_num * sizeof(phb->ioda.io_segmap[0]); size += phb->ioda.total_pe_num * sizeof(phb->ioda.io_segmap[0]);
dma32map_off = size;
size += phb->ioda.dma32_count *
sizeof(phb->ioda.dma32_segmap[0]);
} }
pemap_off = size; pemap_off = size;
size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe); size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
@ -3437,6 +3442,10 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
phb->ioda.io_segmap = aux + iomap_off; phb->ioda.io_segmap = aux + iomap_off;
for (segno = 0; segno < phb->ioda.total_pe_num; segno++) for (segno = 0; segno < phb->ioda.total_pe_num; segno++)
phb->ioda.io_segmap[segno] = IODA_INVALID_PE; phb->ioda.io_segmap[segno] = IODA_INVALID_PE;
phb->ioda.dma32_segmap = aux + dma32map_off;
for (segno = 0; segno < phb->ioda.dma32_count; segno++)
phb->ioda.dma32_segmap[segno] = IODA_INVALID_PE;
} }
phb->ioda.pe_array = aux + pemap_off; phb->ioda.pe_array = aux + pemap_off;
set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc); set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc);
@ -3445,7 +3454,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
mutex_init(&phb->ioda.pe_list_mutex); mutex_init(&phb->ioda.pe_list_mutex);
/* Calculate how many 32-bit TCE segments we have */ /* Calculate how many 32-bit TCE segments we have */
phb->ioda.tce32_count = phb->ioda.m32_pci_base / phb->ioda.dma32_count = phb->ioda.m32_pci_base /
PNV_IODA1_DMA32_SEGSIZE; PNV_IODA1_DMA32_SEGSIZE;
#if 0 /* We should really do that ... */ #if 0 /* We should really do that ... */

View File

@ -142,6 +142,10 @@ struct pnv_phb {
unsigned int *m32_segmap; unsigned int *m32_segmap;
unsigned int *io_segmap; unsigned int *io_segmap;
/* DMA32 segment maps - IODA1 only */
unsigned int dma32_count;
unsigned int *dma32_segmap;
/* IRQ chip */ /* IRQ chip */
int irq_chip_init; int irq_chip_init;
struct irq_chip irq_chip; struct irq_chip irq_chip;
@ -158,9 +162,6 @@ struct pnv_phb {
*/ */
unsigned char pe_rmap[0x10000]; unsigned char pe_rmap[0x10000];
/* 32-bit TCE tables allocation */
unsigned long tce32_count;
/* TCE cache invalidate registers (physical and /* TCE cache invalidate registers (physical and
* remapped) * remapped)
*/ */