From 8a695db01dc2b07959628626bc3810c4c6ff2681 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 19 Jan 2016 08:57:48 -0700 Subject: [PATCH 1/4] dmaengine: IOATDMA: fix timer code that continues to restart channels during idle The timer_event() function seems to have a bug where it ends up marking the last entry as non-responding and eventually attempts to restart the channel. This also continuously happen when idle. What needs to happen is for us to make sure there are no descriptors active and then handle that case properly. We should only hit the "cleanup" stage if there are still active descriptors. Signed-off-by: Dave Jiang Signed-off-by: Vinod Koul --- drivers/dma/ioat/dma.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 1d5df2ef148b..21539d5c54c3 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -861,32 +861,42 @@ void ioat_timer_event(unsigned long data) return; } + spin_lock_bh(&ioat_chan->cleanup_lock); + + /* handle the no-actives case */ + if (!ioat_ring_active(ioat_chan)) { + spin_lock_bh(&ioat_chan->prep_lock); + check_active(ioat_chan); + spin_unlock_bh(&ioat_chan->prep_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); + return; + } + /* if we haven't made progress and we have already * acknowledged a pending completion once, then be more * forceful with a restart */ - spin_lock_bh(&ioat_chan->cleanup_lock); if (ioat_cleanup_preamble(ioat_chan, &phys_complete)) __cleanup(ioat_chan, phys_complete); else if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) { + u32 chanerr; + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + dev_warn(to_dev(ioat_chan), "Restarting channel...\n"); + dev_warn(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n", + status, chanerr); + dev_warn(to_dev(ioat_chan), "Active descriptors: %d\n", + ioat_ring_active(ioat_chan)); + spin_lock_bh(&ioat_chan->prep_lock); ioat_restart_channel(ioat_chan); spin_unlock_bh(&ioat_chan->prep_lock); spin_unlock_bh(&ioat_chan->cleanup_lock); return; - } else { + } else set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state); - mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); - } - - if (ioat_ring_active(ioat_chan)) - mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); - else { - spin_lock_bh(&ioat_chan->prep_lock); - check_active(ioat_chan); - spin_unlock_bh(&ioat_chan->prep_lock); - } + mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT); spin_unlock_bh(&ioat_chan->cleanup_lock); } From 3efaf2a9a053bce452cec8d4c14f7ad6912215c9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 29 Jan 2016 16:27:07 +0200 Subject: [PATCH 2/4] dmaengine: dw: pci: add ID for WildcatPoint PCH WildcatPoint PCH as seen on MacBook 12-inch (Early 2015) has PCI enabled DesignWare DMA controller. Enable it by adding its ID to the corresponding driver. Reported-by: Leif Liddy BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=110901 Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/dw/pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c index 4c30fdd092b3..358f9689a3f5 100644 --- a/drivers/dma/dw/pci.c +++ b/drivers/dma/dw/pci.c @@ -108,6 +108,10 @@ static const struct pci_device_id dw_pci_id_table[] = { /* Haswell */ { PCI_VDEVICE(INTEL, 0x9c60) }, + + /* Broadwell */ + { PCI_VDEVICE(INTEL, 0x9ce0) }, + { } }; MODULE_DEVICE_TABLE(pci, dw_pci_id_table); From 4ac31d18e4125eb2970e69069a15308cbb8e4486 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 28 Jan 2016 11:29:08 +0100 Subject: [PATCH 3/4] dmaengine: edma: fix residue race for cyclic When retrieving the residue value, the SRC/DST fields of the active PaRAM are read to determine the current position of the DMA engine. However, the AM335x Technical Reference Manual states: 11.3.3.6 Parameter Set Updates After the TR is read from the PaRAM (and is in the process of being submitted to the EDMA3TC), the following fields are updated as needed: ... SRC DST This means SRC/DST is incremented even though the DMA transfer may not have started yet or is in progress. Thus if the reader of the residue accesses the DMA buffer too quickly, the CPU is misinformed about the data that has been successfully processed. The CCSTAT.ACTV register is a boolean that is set if any TR is being processed by either the EMDA3CC or EDMA3TC. By polling this register it is possible to ensure that the residue value returned is valid for immediate processing. However, since the DMA engine may be active, polling may never hit a moment where no TR is being processed. To handle this, the SRC/DST is also polled to see if it changes. And as a last resort, a max loop count for the busy waiting exists to avoid an infinite loop. Signed-off-by: John Ogness Acked-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- drivers/dma/edma.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index d92d65549406..e3d7fcb69b4c 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -113,6 +113,9 @@ #define GET_NUM_REGN(x) ((x & 0x300000) >> 20) /* bits 20-21 */ #define CHMAP_EXIST BIT(24) +/* CCSTAT register */ +#define EDMA_CCSTAT_ACTV BIT(4) + /* * Max of 20 segments per channel to conserve PaRAM slots * Also note that MAX_NR_SG should be atleast the no.of periods @@ -1680,9 +1683,20 @@ static void edma_issue_pending(struct dma_chan *chan) spin_unlock_irqrestore(&echan->vchan.lock, flags); } +/* + * This limit exists to avoid a possible infinite loop when waiting for proof + * that a particular transfer is completed. This limit can be hit if there + * are large bursts to/from slow devices or the CPU is never able to catch + * the DMA hardware idle. On an AM335x transfering 48 bytes from the UART + * RX-FIFO, as many as 55 loops have been seen. + */ +#define EDMA_MAX_TR_WAIT_LOOPS 1000 + static u32 edma_residue(struct edma_desc *edesc) { bool dst = edesc->direction == DMA_DEV_TO_MEM; + int loop_count = EDMA_MAX_TR_WAIT_LOOPS; + struct edma_chan *echan = edesc->echan; struct edma_pset *pset = edesc->pset; dma_addr_t done, pos; int i; @@ -1691,7 +1705,32 @@ static u32 edma_residue(struct edma_desc *edesc) * We always read the dst/src position from the first RamPar * pset. That's the one which is active now. */ - pos = edma_get_position(edesc->echan->ecc, edesc->echan->slot[0], dst); + pos = edma_get_position(echan->ecc, echan->slot[0], dst); + + /* + * "pos" may represent a transfer request that is still being + * processed by the EDMACC or EDMATC. We will busy wait until + * any one of the situations occurs: + * 1. the DMA hardware is idle + * 2. a new transfer request is setup + * 3. we hit the loop limit + */ + while (edma_read(echan->ecc, EDMA_CCSTAT) & EDMA_CCSTAT_ACTV) { + /* check if a new transfer request is setup */ + if (edma_get_position(echan->ecc, + echan->slot[0], dst) != pos) { + break; + } + + if (!--loop_count) { + dev_dbg_ratelimited(echan->vchan.chan.device->dev, + "%s: timeout waiting for PaRAM update\n", + __func__); + break; + } + + cpu_relax(); + } /* * Cyclic is simple. Just subtract pset[0].addr from pos. From ee1cdcdae59563535485a5f56ee72c894ab7d7ad Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 10 Feb 2016 15:59:42 +0200 Subject: [PATCH 4/4] dmaengine: dw: disable BLOCK IRQs for non-cyclic xfer The commit 2895b2cad6e7 ("dmaengine: dw: fix cyclic transfer callbacks") re-enabled BLOCK interrupts with regard to make cyclic transfers work. However, this change becomes a regression for non-cyclic transfers as interrupt counters under stress test had been grown enormously (approximately per 4-5 bytes in the UART loop back test). Taking into consideration above enable BLOCK interrupts if and only if channel is programmed to perform cyclic transfer. Fixes: 2895b2cad6e7 ("dmaengine: dw: fix cyclic transfer callbacks") Signed-off-by: Andy Shevchenko Acked-by: Mans Rullgard Tested-by: Mans Rullgard Acked-by: Viresh Kumar Cc: Signed-off-by: Vinod Koul --- drivers/dma/dw/core.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index e893318560db..5ad0ec1f0e29 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -156,7 +156,6 @@ static void dwc_initialize(struct dw_dma_chan *dwc) /* Enable interrupts */ channel_set_bit(dw, MASK.XFER, dwc->mask); - channel_set_bit(dw, MASK.BLOCK, dwc->mask); channel_set_bit(dw, MASK.ERROR, dwc->mask); dwc->initialized = true; @@ -588,6 +587,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, spin_unlock_irqrestore(&dwc->lock, flags); } + + /* Re-enable interrupts */ + channel_set_bit(dw, MASK.BLOCK, dwc->mask); } /* ------------------------------------------------------------------------- */ @@ -618,11 +620,8 @@ static void dw_dma_tasklet(unsigned long data) dwc_scan_descriptors(dw, dwc); } - /* - * Re-enable interrupts. - */ + /* Re-enable interrupts */ channel_set_bit(dw, MASK.XFER, dw->all_chan_mask); - channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask); } @@ -1261,6 +1260,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) int dw_dma_cyclic_start(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); unsigned long flags; if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) { @@ -1269,7 +1269,12 @@ int dw_dma_cyclic_start(struct dma_chan *chan) } spin_lock_irqsave(&dwc->lock, flags); + + /* Enable interrupts to perform cyclic transfer */ + channel_set_bit(dw, MASK.BLOCK, dwc->mask); + dwc_dostart(dwc, dwc->cdesc->desc[0]); + spin_unlock_irqrestore(&dwc->lock, flags); return 0;