linux/drivers/mmc/host/sdhci-tegra.c

1701 lines
49 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2010 Google, Inc.
*/
#include <linux/delay.h>
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/iopoll.h>
#include <linux/platform_device.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/pinctrl/consumer.h>
#include <linux/regulator/consumer.h>
#include <linux/reset.h>
#include <linux/mmc/card.h>
#include <linux/mmc/host.h>
#include <linux/mmc/mmc.h>
#include <linux/mmc/slot-gpio.h>
#include <linux/gpio/consumer.h>
#include <linux/ktime.h>
#include "sdhci-pltfm.h"
#include "cqhci.h"
/* Tegra SDHOST controller vendor register definitions */
#define SDHCI_TEGRA_VENDOR_CLOCK_CTRL 0x100
#define SDHCI_CLOCK_CTRL_TAP_MASK 0x00ff0000
#define SDHCI_CLOCK_CTRL_TAP_SHIFT 16
#define SDHCI_CLOCK_CTRL_TRIM_MASK 0x1f000000
#define SDHCI_CLOCK_CTRL_TRIM_SHIFT 24
#define SDHCI_CLOCK_CTRL_SDR50_TUNING_OVERRIDE BIT(5)
#define SDHCI_CLOCK_CTRL_PADPIPE_CLKEN_OVERRIDE BIT(3)
#define SDHCI_CLOCK_CTRL_SPI_MODE_CLKEN_OVERRIDE BIT(2)
#define SDHCI_TEGRA_VENDOR_SYS_SW_CTRL 0x104
#define SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE BIT(31)
#define SDHCI_TEGRA_VENDOR_CAP_OVERRIDES 0x10c
#define SDHCI_TEGRA_CAP_OVERRIDES_DQS_TRIM_MASK 0x00003f00
#define SDHCI_TEGRA_CAP_OVERRIDES_DQS_TRIM_SHIFT 8
#define SDHCI_TEGRA_VENDOR_MISC_CTRL 0x120
#define SDHCI_MISC_CTRL_ENABLE_SDR104 0x8
#define SDHCI_MISC_CTRL_ENABLE_SDR50 0x10
#define SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300 0x20
#define SDHCI_MISC_CTRL_ENABLE_DDR50 0x200
#define SDHCI_TEGRA_VENDOR_DLLCAL_CFG 0x1b0
#define SDHCI_TEGRA_DLLCAL_CALIBRATE BIT(31)
#define SDHCI_TEGRA_VENDOR_DLLCAL_STA 0x1bc
#define SDHCI_TEGRA_DLLCAL_STA_ACTIVE BIT(31)
#define SDHCI_VNDR_TUN_CTRL0_0 0x1c0
#define SDHCI_VNDR_TUN_CTRL0_TUN_HW_TAP 0x20000
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
#define SDHCI_VNDR_TUN_CTRL0_START_TAP_VAL_MASK 0x03fc0000
#define SDHCI_VNDR_TUN_CTRL0_START_TAP_VAL_SHIFT 18
#define SDHCI_VNDR_TUN_CTRL0_MUL_M_MASK 0x00001fc0
#define SDHCI_VNDR_TUN_CTRL0_MUL_M_SHIFT 6
#define SDHCI_VNDR_TUN_CTRL0_TUN_ITER_MASK 0x000e000
#define SDHCI_VNDR_TUN_CTRL0_TUN_ITER_SHIFT 13
#define TRIES_128 2
#define TRIES_256 4
#define SDHCI_VNDR_TUN_CTRL0_TUN_WORD_SEL_MASK 0x7
#define SDHCI_TEGRA_VNDR_TUN_CTRL1_0 0x1c4
#define SDHCI_TEGRA_VNDR_TUN_STATUS0 0x1C8
#define SDHCI_TEGRA_VNDR_TUN_STATUS1 0x1CC
#define SDHCI_TEGRA_VNDR_TUN_STATUS1_TAP_MASK 0xFF
#define SDHCI_TEGRA_VNDR_TUN_STATUS1_END_TAP_SHIFT 0x8
#define TUNING_WORD_BIT_SIZE 32
#define SDHCI_TEGRA_AUTO_CAL_CONFIG 0x1e4
#define SDHCI_AUTO_CAL_START BIT(31)
#define SDHCI_AUTO_CAL_ENABLE BIT(29)
#define SDHCI_AUTO_CAL_PDPU_OFFSET_MASK 0x0000ffff
#define SDHCI_TEGRA_SDMEM_COMP_PADCTRL 0x1e0
#define SDHCI_TEGRA_SDMEM_COMP_PADCTRL_VREF_SEL_MASK 0x0000000f
#define SDHCI_TEGRA_SDMEM_COMP_PADCTRL_VREF_SEL_VAL 0x7
#define SDHCI_TEGRA_SDMEM_COMP_PADCTRL_E_INPUT_E_PWRD BIT(31)
#define SDHCI_COMP_PADCTRL_DRVUPDN_OFFSET_MASK 0x07FFF000
#define SDHCI_TEGRA_AUTO_CAL_STATUS 0x1ec
#define SDHCI_TEGRA_AUTO_CAL_ACTIVE BIT(31)
#define NVQUIRK_FORCE_SDHCI_SPEC_200 BIT(0)
#define NVQUIRK_ENABLE_BLOCK_GAP_DET BIT(1)
#define NVQUIRK_ENABLE_SDHCI_SPEC_300 BIT(2)
#define NVQUIRK_ENABLE_SDR50 BIT(3)
#define NVQUIRK_ENABLE_SDR104 BIT(4)
#define NVQUIRK_ENABLE_DDR50 BIT(5)
#define NVQUIRK_HAS_PADCALIB BIT(6)
#define NVQUIRK_NEEDS_PAD_CONTROL BIT(7)
#define NVQUIRK_DIS_CARD_CLK_CONFIG_TAP BIT(8)
#define NVQUIRK_CQHCI_DCMD_R1B_CMD_TIMING BIT(9)
/* SDMMC CQE Base Address for Tegra Host Ver 4.1 and Higher */
#define SDHCI_TEGRA_CQE_BASE_ADDR 0xF000
struct sdhci_tegra_soc_data {
const struct sdhci_pltfm_data *pdata;
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
u64 dma_mask;
u32 nvquirks;
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
u8 min_tap_delay;
u8 max_tap_delay;
};
/* Magic pull up and pull down pad calibration offsets */
struct sdhci_tegra_autocal_offsets {
u32 pull_up_3v3;
u32 pull_down_3v3;
u32 pull_up_3v3_timeout;
u32 pull_down_3v3_timeout;
u32 pull_up_1v8;
u32 pull_down_1v8;
u32 pull_up_1v8_timeout;
u32 pull_down_1v8_timeout;
u32 pull_up_sdr104;
u32 pull_down_sdr104;
u32 pull_up_hs400;
u32 pull_down_hs400;
};
struct sdhci_tegra {
const struct sdhci_tegra_soc_data *soc_data;
struct gpio_desc *power_gpio;
bool ddr_signaling;
bool pad_calib_required;
bool pad_control_available;
struct reset_control *rst;
struct pinctrl *pinctrl_sdmmc;
struct pinctrl_state *pinctrl_state_3v3;
struct pinctrl_state *pinctrl_state_1v8;
struct pinctrl_state *pinctrl_state_3v3_drv;
struct pinctrl_state *pinctrl_state_1v8_drv;
struct sdhci_tegra_autocal_offsets autocal_offsets;
ktime_t last_calib;
u32 default_tap;
u32 default_trim;
u32 dqs_trim;
bool enable_hwcq;
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
unsigned long curr_clk_rate;
u8 tuned_tap_delay;
};
static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
if (unlikely((soc_data->nvquirks & NVQUIRK_FORCE_SDHCI_SPEC_200) &&
(reg == SDHCI_HOST_VERSION))) {
/* Erratum: Version register is invalid in HW. */
return SDHCI_SPEC_200;
}
return readw(host->ioaddr + reg);
}
static void tegra_sdhci_writew(struct sdhci_host *host, u16 val, int reg)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
switch (reg) {
case SDHCI_TRANSFER_MODE:
/*
* Postpone this write, we must do it together with a
* command write that is down below.
*/
pltfm_host->xfer_mode_shadow = val;
return;
case SDHCI_COMMAND:
writel((val << 16) | pltfm_host->xfer_mode_shadow,
host->ioaddr + SDHCI_TRANSFER_MODE);
return;
}
writew(val, host->ioaddr + reg);
}
static void tegra_sdhci_writel(struct sdhci_host *host, u32 val, int reg)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
/* Seems like we're getting spurious timeout and crc errors, so
* disable signalling of them. In case of real errors software
* timers should take care of eventually detecting them.
*/
if (unlikely(reg == SDHCI_SIGNAL_ENABLE))
val &= ~(SDHCI_INT_TIMEOUT|SDHCI_INT_CRC);
writel(val, host->ioaddr + reg);
if (unlikely((soc_data->nvquirks & NVQUIRK_ENABLE_BLOCK_GAP_DET) &&
(reg == SDHCI_INT_ENABLE))) {
/* Erratum: Must enable block gap interrupt detection */
u8 gap_ctrl = readb(host->ioaddr + SDHCI_BLOCK_GAP_CONTROL);
if (val & SDHCI_INT_CARD_INT)
gap_ctrl |= 0x8;
else
gap_ctrl &= ~0x8;
writeb(gap_ctrl, host->ioaddr + SDHCI_BLOCK_GAP_CONTROL);
}
}
static bool tegra_sdhci_configure_card_clk(struct sdhci_host *host, bool enable)
{
bool status;
u32 reg;
reg = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
status = !!(reg & SDHCI_CLOCK_CARD_EN);
if (status == enable)
return status;
if (enable)
reg |= SDHCI_CLOCK_CARD_EN;
else
reg &= ~SDHCI_CLOCK_CARD_EN;
sdhci_writew(host, reg, SDHCI_CLOCK_CONTROL);
return status;
}
static void tegra210_sdhci_writew(struct sdhci_host *host, u16 val, int reg)
{
bool is_tuning_cmd = 0;
bool clk_enabled;
u8 cmd;
if (reg == SDHCI_COMMAND) {
cmd = SDHCI_GET_CMD(val);
is_tuning_cmd = cmd == MMC_SEND_TUNING_BLOCK ||
cmd == MMC_SEND_TUNING_BLOCK_HS200;
}
if (is_tuning_cmd)
clk_enabled = tegra_sdhci_configure_card_clk(host, 0);
writew(val, host->ioaddr + reg);
if (is_tuning_cmd) {
udelay(1);
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
tegra_sdhci_configure_card_clk(host, clk_enabled);
}
}
static unsigned int tegra_sdhci_get_ro(struct sdhci_host *host)
{
/*
* Write-enable shall be assumed if GPIO is missing in a board's
* device-tree because SDHCI's WRITE_PROTECT bit doesn't work on
* Tegra.
*/
return mmc_gpio_get_ro(host->mmc);
}
static bool tegra_sdhci_is_pad_and_regulator_valid(struct sdhci_host *host)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
int has_1v8, has_3v3;
/*
* The SoCs which have NVQUIRK_NEEDS_PAD_CONTROL require software pad
* voltage configuration in order to perform voltage switching. This
* means that valid pinctrl info is required on SDHCI instances capable
* of performing voltage switching. Whether or not an SDHCI instance is
* capable of voltage switching is determined based on the regulator.
*/
if (!(tegra_host->soc_data->nvquirks & NVQUIRK_NEEDS_PAD_CONTROL))
return true;
if (IS_ERR(host->mmc->supply.vqmmc))
return false;
has_1v8 = regulator_is_supported_voltage(host->mmc->supply.vqmmc,
1700000, 1950000);
has_3v3 = regulator_is_supported_voltage(host->mmc->supply.vqmmc,
2700000, 3600000);
if (has_1v8 == 1 && has_3v3 == 1)
return tegra_host->pad_control_available;
/* Fixed voltage, no pad control required. */
return true;
}
static void tegra_sdhci_set_tap(struct sdhci_host *host, unsigned int tap)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
bool card_clk_enabled = false;
u32 reg;
/*
* Touching the tap values is a bit tricky on some SoC generations.
* The quirk enables a workaround for a glitch that sometimes occurs if
* the tap values are changed.
*/
if (soc_data->nvquirks & NVQUIRK_DIS_CARD_CLK_CONFIG_TAP)
card_clk_enabled = tegra_sdhci_configure_card_clk(host, false);
reg = sdhci_readl(host, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
reg &= ~SDHCI_CLOCK_CTRL_TAP_MASK;
reg |= tap << SDHCI_CLOCK_CTRL_TAP_SHIFT;
sdhci_writel(host, reg, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
if (soc_data->nvquirks & NVQUIRK_DIS_CARD_CLK_CONFIG_TAP &&
card_clk_enabled) {
udelay(1);
sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
tegra_sdhci_configure_card_clk(host, card_clk_enabled);
}
}
static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc,
struct mmc_ios *ios)
{
struct sdhci_host *host = mmc_priv(mmc);
u32 val;
val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL);
if (ios->enhanced_strobe)
val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE;
else
val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE;
sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL);
}
static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
u32 misc_ctrl, clk_ctrl, pad_ctrl;
sdhci_reset(host, mask);
if (!(mask & SDHCI_RESET_ALL))
return;
tegra_sdhci_set_tap(host, tegra_host->default_tap);
misc_ctrl = sdhci_readl(host, SDHCI_TEGRA_VENDOR_MISC_CTRL);
clk_ctrl = sdhci_readl(host, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
mmc: tegra: Only advertise UHS modes if IO regulator is present To support UHS modes for Tegra an external regulator must be present to adjust the IO voltage accordingly. Even if the regulator is not present but the host supports the UHS modes and the device supports the UHS modes, then we will attempt to switch to a high-speed mode. Without an external regulator, Tegra will fail to switch to the high-speed mode. It has been found that with some SD cards, that once it has been switch to operate at a high-speed mode, all subsequent commands issues to the card will fail and so it will not be possible to switch back to a non high-speed mode and so the SD card initialisation will fail. The SDHCI core does not require that the host have an external regulator when switching to UHS modes and therefore, the Tegra SDHCI host controller should only advertise the UHS modes as being supported if the regulator for the IO voltage is present. Fortunately, Tegra has a vendor specific register which can be used to control which modes are advertised via the SDHCI_CAPABILITIES register. Hence, if there is no IO voltage regulator available for the Tegra SDHCI host, then don't advertise the UHS modes. Note that if the regulator is not available, we also don't advertise that the SDHCI is compatible with v3.0 of the SDHCI specification because this will read the SDHCI_CAPABILITIES_1 register which will enable other UHS modes. This fixes commit 7ad2ed1dfcbe ("mmc: tegra: enable UHS-I modes") which enables UHS mode without checking if the board can support them. Fixes: 7ad2ed1dfcbe ("mmc: tegra: enable UHS-I modes") Signed-off-by: Jon Hunter <jonathanh@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2016-07-12 21:53:37 +08:00
misc_ctrl &= ~(SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300 |
SDHCI_MISC_CTRL_ENABLE_SDR50 |
SDHCI_MISC_CTRL_ENABLE_DDR50 |
SDHCI_MISC_CTRL_ENABLE_SDR104);
clk_ctrl &= ~(SDHCI_CLOCK_CTRL_TRIM_MASK |
SDHCI_CLOCK_CTRL_SPI_MODE_CLKEN_OVERRIDE);
mmc: tegra: Only advertise UHS modes if IO regulator is present To support UHS modes for Tegra an external regulator must be present to adjust the IO voltage accordingly. Even if the regulator is not present but the host supports the UHS modes and the device supports the UHS modes, then we will attempt to switch to a high-speed mode. Without an external regulator, Tegra will fail to switch to the high-speed mode. It has been found that with some SD cards, that once it has been switch to operate at a high-speed mode, all subsequent commands issues to the card will fail and so it will not be possible to switch back to a non high-speed mode and so the SD card initialisation will fail. The SDHCI core does not require that the host have an external regulator when switching to UHS modes and therefore, the Tegra SDHCI host controller should only advertise the UHS modes as being supported if the regulator for the IO voltage is present. Fortunately, Tegra has a vendor specific register which can be used to control which modes are advertised via the SDHCI_CAPABILITIES register. Hence, if there is no IO voltage regulator available for the Tegra SDHCI host, then don't advertise the UHS modes. Note that if the regulator is not available, we also don't advertise that the SDHCI is compatible with v3.0 of the SDHCI specification because this will read the SDHCI_CAPABILITIES_1 register which will enable other UHS modes. This fixes commit 7ad2ed1dfcbe ("mmc: tegra: enable UHS-I modes") which enables UHS mode without checking if the board can support them. Fixes: 7ad2ed1dfcbe ("mmc: tegra: enable UHS-I modes") Signed-off-by: Jon Hunter <jonathanh@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2016-07-12 21:53:37 +08:00
if (tegra_sdhci_is_pad_and_regulator_valid(host)) {
mmc: tegra: Only advertise UHS modes if IO regulator is present To support UHS modes for Tegra an external regulator must be present to adjust the IO voltage accordingly. Even if the regulator is not present but the host supports the UHS modes and the device supports the UHS modes, then we will attempt to switch to a high-speed mode. Without an external regulator, Tegra will fail to switch to the high-speed mode. It has been found that with some SD cards, that once it has been switch to operate at a high-speed mode, all subsequent commands issues to the card will fail and so it will not be possible to switch back to a non high-speed mode and so the SD card initialisation will fail. The SDHCI core does not require that the host have an external regulator when switching to UHS modes and therefore, the Tegra SDHCI host controller should only advertise the UHS modes as being supported if the regulator for the IO voltage is present. Fortunately, Tegra has a vendor specific register which can be used to control which modes are advertised via the SDHCI_CAPABILITIES register. Hence, if there is no IO voltage regulator available for the Tegra SDHCI host, then don't advertise the UHS modes. Note that if the regulator is not available, we also don't advertise that the SDHCI is compatible with v3.0 of the SDHCI specification because this will read the SDHCI_CAPABILITIES_1 register which will enable other UHS modes. This fixes commit 7ad2ed1dfcbe ("mmc: tegra: enable UHS-I modes") which enables UHS mode without checking if the board can support them. Fixes: 7ad2ed1dfcbe ("mmc: tegra: enable UHS-I modes") Signed-off-by: Jon Hunter <jonathanh@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2016-07-12 21:53:37 +08:00
/* Erratum: Enable SDHCI spec v3.00 support */
if (soc_data->nvquirks & NVQUIRK_ENABLE_SDHCI_SPEC_300)
misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300;
/* Advertise UHS modes as supported by host */
if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR50)
misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR50;
if (soc_data->nvquirks & NVQUIRK_ENABLE_DDR50)
misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_DDR50;
if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR104)
misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR104;
if (soc_data->nvquirks & SDHCI_MISC_CTRL_ENABLE_SDR50)
clk_ctrl |= SDHCI_CLOCK_CTRL_SDR50_TUNING_OVERRIDE;
}
clk_ctrl |= tegra_host->default_trim << SDHCI_CLOCK_CTRL_TRIM_SHIFT;
mmc: tegra: Only advertise UHS modes if IO regulator is present To support UHS modes for Tegra an external regulator must be present to adjust the IO voltage accordingly. Even if the regulator is not present but the host supports the UHS modes and the device supports the UHS modes, then we will attempt to switch to a high-speed mode. Without an external regulator, Tegra will fail to switch to the high-speed mode. It has been found that with some SD cards, that once it has been switch to operate at a high-speed mode, all subsequent commands issues to the card will fail and so it will not be possible to switch back to a non high-speed mode and so the SD card initialisation will fail. The SDHCI core does not require that the host have an external regulator when switching to UHS modes and therefore, the Tegra SDHCI host controller should only advertise the UHS modes as being supported if the regulator for the IO voltage is present. Fortunately, Tegra has a vendor specific register which can be used to control which modes are advertised via the SDHCI_CAPABILITIES register. Hence, if there is no IO voltage regulator available for the Tegra SDHCI host, then don't advertise the UHS modes. Note that if the regulator is not available, we also don't advertise that the SDHCI is compatible with v3.0 of the SDHCI specification because this will read the SDHCI_CAPABILITIES_1 register which will enable other UHS modes. This fixes commit 7ad2ed1dfcbe ("mmc: tegra: enable UHS-I modes") which enables UHS mode without checking if the board can support them. Fixes: 7ad2ed1dfcbe ("mmc: tegra: enable UHS-I modes") Signed-off-by: Jon Hunter <jonathanh@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2016-07-12 21:53:37 +08:00
sdhci_writel(host, misc_ctrl, SDHCI_TEGRA_VENDOR_MISC_CTRL);
sdhci_writel(host, clk_ctrl, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
if (soc_data->nvquirks & NVQUIRK_HAS_PADCALIB) {
pad_ctrl = sdhci_readl(host, SDHCI_TEGRA_SDMEM_COMP_PADCTRL);
pad_ctrl &= ~SDHCI_TEGRA_SDMEM_COMP_PADCTRL_VREF_SEL_MASK;
pad_ctrl |= SDHCI_TEGRA_SDMEM_COMP_PADCTRL_VREF_SEL_VAL;
sdhci_writel(host, pad_ctrl, SDHCI_TEGRA_SDMEM_COMP_PADCTRL);
tegra_host->pad_calib_required = true;
}
tegra_host->ddr_signaling = false;
}
static void tegra_sdhci_configure_cal_pad(struct sdhci_host *host, bool enable)
{
u32 val;
/*
* Enable or disable the additional I/O pad used by the drive strength
* calibration process.
*/
val = sdhci_readl(host, SDHCI_TEGRA_SDMEM_COMP_PADCTRL);
if (enable)
val |= SDHCI_TEGRA_SDMEM_COMP_PADCTRL_E_INPUT_E_PWRD;
else
val &= ~SDHCI_TEGRA_SDMEM_COMP_PADCTRL_E_INPUT_E_PWRD;
sdhci_writel(host, val, SDHCI_TEGRA_SDMEM_COMP_PADCTRL);
if (enable)
usleep_range(1, 2);
}
static void tegra_sdhci_set_pad_autocal_offset(struct sdhci_host *host,
u16 pdpu)
{
u32 reg;
reg = sdhci_readl(host, SDHCI_TEGRA_AUTO_CAL_CONFIG);
reg &= ~SDHCI_AUTO_CAL_PDPU_OFFSET_MASK;
reg |= pdpu;
sdhci_writel(host, reg, SDHCI_TEGRA_AUTO_CAL_CONFIG);
}
static int tegra_sdhci_set_padctrl(struct sdhci_host *host, int voltage,
bool state_drvupdn)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
struct sdhci_tegra_autocal_offsets *offsets =
&tegra_host->autocal_offsets;
struct pinctrl_state *pinctrl_drvupdn = NULL;
int ret = 0;
u8 drvup = 0, drvdn = 0;
u32 reg;
if (!state_drvupdn) {
/* PADS Drive Strength */
if (voltage == MMC_SIGNAL_VOLTAGE_180) {
if (tegra_host->pinctrl_state_1v8_drv) {
pinctrl_drvupdn =
tegra_host->pinctrl_state_1v8_drv;
} else {
drvup = offsets->pull_up_1v8_timeout;
drvdn = offsets->pull_down_1v8_timeout;
}
} else {
if (tegra_host->pinctrl_state_3v3_drv) {
pinctrl_drvupdn =
tegra_host->pinctrl_state_3v3_drv;
} else {
drvup = offsets->pull_up_3v3_timeout;
drvdn = offsets->pull_down_3v3_timeout;
}
}
if (pinctrl_drvupdn != NULL) {
ret = pinctrl_select_state(tegra_host->pinctrl_sdmmc,
pinctrl_drvupdn);
if (ret < 0)
dev_err(mmc_dev(host->mmc),
"failed pads drvupdn, ret: %d\n", ret);
} else if ((drvup) || (drvdn)) {
reg = sdhci_readl(host,
SDHCI_TEGRA_SDMEM_COMP_PADCTRL);
reg &= ~SDHCI_COMP_PADCTRL_DRVUPDN_OFFSET_MASK;
reg |= (drvup << 20) | (drvdn << 12);
sdhci_writel(host, reg,
SDHCI_TEGRA_SDMEM_COMP_PADCTRL);
}
} else {
/* Dual Voltage PADS Voltage selection */
if (!tegra_host->pad_control_available)
return 0;
if (voltage == MMC_SIGNAL_VOLTAGE_180) {
ret = pinctrl_select_state(tegra_host->pinctrl_sdmmc,
tegra_host->pinctrl_state_1v8);
if (ret < 0)
dev_err(mmc_dev(host->mmc),
"setting 1.8V failed, ret: %d\n", ret);
} else {
ret = pinctrl_select_state(tegra_host->pinctrl_sdmmc,
tegra_host->pinctrl_state_3v3);
if (ret < 0)
dev_err(mmc_dev(host->mmc),
"setting 3.3V failed, ret: %d\n", ret);
}
}
return ret;
}
static void tegra_sdhci_pad_autocalib(struct sdhci_host *host)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
struct sdhci_tegra_autocal_offsets offsets =
tegra_host->autocal_offsets;
struct mmc_ios *ios = &host->mmc->ios;
bool card_clk_enabled;
u16 pdpu;
u32 reg;
int ret;
switch (ios->timing) {
case MMC_TIMING_UHS_SDR104:
pdpu = offsets.pull_down_sdr104 << 8 | offsets.pull_up_sdr104;
break;
case MMC_TIMING_MMC_HS400:
pdpu = offsets.pull_down_hs400 << 8 | offsets.pull_up_hs400;
break;
default:
if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180)
pdpu = offsets.pull_down_1v8 << 8 | offsets.pull_up_1v8;
else
pdpu = offsets.pull_down_3v3 << 8 | offsets.pull_up_3v3;
}
/* Set initial offset before auto-calibration */
tegra_sdhci_set_pad_autocal_offset(host, pdpu);
card_clk_enabled = tegra_sdhci_configure_card_clk(host, false);
tegra_sdhci_configure_cal_pad(host, true);
reg = sdhci_readl(host, SDHCI_TEGRA_AUTO_CAL_CONFIG);
reg |= SDHCI_AUTO_CAL_ENABLE | SDHCI_AUTO_CAL_START;
sdhci_writel(host, reg, SDHCI_TEGRA_AUTO_CAL_CONFIG);
usleep_range(1, 2);
/* 10 ms timeout */
ret = readl_poll_timeout(host->ioaddr + SDHCI_TEGRA_AUTO_CAL_STATUS,
reg, !(reg & SDHCI_TEGRA_AUTO_CAL_ACTIVE),
1000, 10000);
tegra_sdhci_configure_cal_pad(host, false);
tegra_sdhci_configure_card_clk(host, card_clk_enabled);
if (ret) {
dev_err(mmc_dev(host->mmc), "Pad autocal timed out\n");
/* Disable automatic cal and use fixed Drive Strengths */
reg = sdhci_readl(host, SDHCI_TEGRA_AUTO_CAL_CONFIG);
reg &= ~SDHCI_AUTO_CAL_ENABLE;
sdhci_writel(host, reg, SDHCI_TEGRA_AUTO_CAL_CONFIG);
ret = tegra_sdhci_set_padctrl(host, ios->signal_voltage, false);
if (ret < 0)
dev_err(mmc_dev(host->mmc),
"Setting drive strengths failed: %d\n", ret);
}
}
static void tegra_sdhci_parse_pad_autocal_dt(struct sdhci_host *host)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
struct sdhci_tegra_autocal_offsets *autocal =
&tegra_host->autocal_offsets;
int err;
err = device_property_read_u32(host->mmc->parent,
"nvidia,pad-autocal-pull-up-offset-3v3",
&autocal->pull_up_3v3);
if (err)
autocal->pull_up_3v3 = 0;
err = device_property_read_u32(host->mmc->parent,
"nvidia,pad-autocal-pull-down-offset-3v3",
&autocal->pull_down_3v3);
if (err)
autocal->pull_down_3v3 = 0;
err = device_property_read_u32(host->mmc->parent,
"nvidia,pad-autocal-pull-up-offset-1v8",
&autocal->pull_up_1v8);
if (err)
autocal->pull_up_1v8 = 0;
err = device_property_read_u32(host->mmc->parent,
"nvidia,pad-autocal-pull-down-offset-1v8",
&autocal->pull_down_1v8);
if (err)
autocal->pull_down_1v8 = 0;
err = device_property_read_u32(host->mmc->parent,
"nvidia,pad-autocal-pull-up-offset-3v3-timeout",
&autocal->pull_up_3v3_timeout);
if (err) {
if (!IS_ERR(tegra_host->pinctrl_state_3v3) &&
(tegra_host->pinctrl_state_3v3_drv == NULL))
pr_warn("%s: Missing autocal timeout 3v3-pad drvs\n",
mmc_hostname(host->mmc));
autocal->pull_up_3v3_timeout = 0;
}
err = device_property_read_u32(host->mmc->parent,
"nvidia,pad-autocal-pull-down-offset-3v3-timeout",
&autocal->pull_down_3v3_timeout);
if (err) {
if (!IS_ERR(tegra_host->pinctrl_state_3v3) &&
(tegra_host->pinctrl_state_3v3_drv == NULL))
pr_warn("%s: Missing autocal timeout 3v3-pad drvs\n",
mmc_hostname(host->mmc));
autocal->pull_down_3v3_timeout = 0;
}
err = device_property_read_u32(host->mmc->parent,
"nvidia,pad-autocal-pull-up-offset-1v8-timeout",
&autocal->pull_up_1v8_timeout);
if (err) {
if (!IS_ERR(tegra_host->pinctrl_state_1v8) &&
(tegra_host->pinctrl_state_1v8_drv == NULL))
pr_warn("%s: Missing autocal timeout 1v8-pad drvs\n",
mmc_hostname(host->mmc));
autocal->pull_up_1v8_timeout = 0;
}
err = device_property_read_u32(host->mmc->parent,
"nvidia,pad-autocal-pull-down-offset-1v8-timeout",
&autocal->pull_down_1v8_timeout);
if (err) {
if (!IS_ERR(tegra_host->pinctrl_state_1v8) &&
(tegra_host->pinctrl_state_1v8_drv == NULL))
pr_warn("%s: Missing autocal timeout 1v8-pad drvs\n",
mmc_hostname(host->mmc));
autocal->pull_down_1v8_timeout = 0;
}
err = device_property_read_u32(host->mmc->parent,
"nvidia,pad-autocal-pull-up-offset-sdr104",
&autocal->pull_up_sdr104);
if (err)
autocal->pull_up_sdr104 = autocal->pull_up_1v8;
err = device_property_read_u32(host->mmc->parent,
"nvidia,pad-autocal-pull-down-offset-sdr104",
&autocal->pull_down_sdr104);
if (err)
autocal->pull_down_sdr104 = autocal->pull_down_1v8;
err = device_property_read_u32(host->mmc->parent,
"nvidia,pad-autocal-pull-up-offset-hs400",
&autocal->pull_up_hs400);
if (err)
autocal->pull_up_hs400 = autocal->pull_up_1v8;
err = device_property_read_u32(host->mmc->parent,
"nvidia,pad-autocal-pull-down-offset-hs400",
&autocal->pull_down_hs400);
if (err)
autocal->pull_down_hs400 = autocal->pull_down_1v8;
}
static void tegra_sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
{
struct sdhci_host *host = mmc_priv(mmc);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
ktime_t since_calib = ktime_sub(ktime_get(), tegra_host->last_calib);
/* 100 ms calibration interval is specified in the TRM */
if (ktime_to_ms(since_calib) > 100) {
tegra_sdhci_pad_autocalib(host);
tegra_host->last_calib = ktime_get();
}
sdhci_request(mmc, mrq);
}
static void tegra_sdhci_parse_tap_and_trim(struct sdhci_host *host)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
int err;
err = device_property_read_u32(host->mmc->parent, "nvidia,default-tap",
&tegra_host->default_tap);
if (err)
tegra_host->default_tap = 0;
err = device_property_read_u32(host->mmc->parent, "nvidia,default-trim",
&tegra_host->default_trim);
if (err)
tegra_host->default_trim = 0;
err = device_property_read_u32(host->mmc->parent, "nvidia,dqs-trim",
&tegra_host->dqs_trim);
if (err)
tegra_host->dqs_trim = 0x11;
}
static void tegra_sdhci_parse_dt(struct sdhci_host *host)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
if (device_property_read_bool(host->mmc->parent, "supports-cqe"))
tegra_host->enable_hwcq = true;
else
tegra_host->enable_hwcq = false;
tegra_sdhci_parse_pad_autocal_dt(host);
tegra_sdhci_parse_tap_and_trim(host);
}
static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
unsigned long host_clk;
if (!clock)
return sdhci_set_clock(host, clock);
/*
* In DDR50/52 modes the Tegra SDHCI controllers require the SDHCI
* divider to be configured to divided the host clock by two. The SDHCI
* clock divider is calculated as part of sdhci_set_clock() by
* sdhci_calc_clk(). The divider is calculated from host->max_clk and
* the requested clock rate.
*
* By setting the host->max_clk to clock * 2 the divider calculation
* will always result in the correct value for DDR50/52 modes,
* regardless of clock rate rounding, which may happen if the value
* from clk_get_rate() is used.
*/
host_clk = tegra_host->ddr_signaling ? clock * 2 : clock;
clk_set_rate(pltfm_host->clk, host_clk);
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
tegra_host->curr_clk_rate = host_clk;
if (tegra_host->ddr_signaling)
host->max_clk = host_clk;
else
host->max_clk = clk_get_rate(pltfm_host->clk);
sdhci_set_clock(host, clock);
if (tegra_host->pad_calib_required) {
tegra_sdhci_pad_autocalib(host);
tegra_host->pad_calib_required = false;
}
}
static unsigned int tegra_sdhci_get_max_clock(struct sdhci_host *host)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
return clk_round_rate(pltfm_host->clk, UINT_MAX);
}
static void tegra_sdhci_set_dqs_trim(struct sdhci_host *host, u8 trim)
{
u32 val;
val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_CAP_OVERRIDES);
val &= ~SDHCI_TEGRA_CAP_OVERRIDES_DQS_TRIM_MASK;
val |= trim << SDHCI_TEGRA_CAP_OVERRIDES_DQS_TRIM_SHIFT;
sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_CAP_OVERRIDES);
}
static void tegra_sdhci_hs400_dll_cal(struct sdhci_host *host)
{
u32 reg;
int err;
reg = sdhci_readl(host, SDHCI_TEGRA_VENDOR_DLLCAL_CFG);
reg |= SDHCI_TEGRA_DLLCAL_CALIBRATE;
sdhci_writel(host, reg, SDHCI_TEGRA_VENDOR_DLLCAL_CFG);
/* 1 ms sleep, 5 ms timeout */
err = readl_poll_timeout(host->ioaddr + SDHCI_TEGRA_VENDOR_DLLCAL_STA,
reg, !(reg & SDHCI_TEGRA_DLLCAL_STA_ACTIVE),
1000, 5000);
if (err)
dev_err(mmc_dev(host->mmc),
"HS400 delay line calibration timed out\n");
}
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
static void tegra_sdhci_tap_correction(struct sdhci_host *host, u8 thd_up,
u8 thd_low, u8 fixed_tap)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
u32 val, tun_status;
u8 word, bit, edge1, tap, window;
bool tap_result;
bool start_fail = false;
bool start_pass = false;
bool end_pass = false;
bool first_fail = false;
bool first_pass = false;
u8 start_pass_tap = 0;
u8 end_pass_tap = 0;
u8 first_fail_tap = 0;
u8 first_pass_tap = 0;
u8 total_tuning_words = host->tuning_loop_count / TUNING_WORD_BIT_SIZE;
/*
* Read auto-tuned results and extract good valid passing window by
* filtering out un-wanted bubble/partial/merged windows.
*/
for (word = 0; word < total_tuning_words; word++) {
val = sdhci_readl(host, SDHCI_VNDR_TUN_CTRL0_0);
val &= ~SDHCI_VNDR_TUN_CTRL0_TUN_WORD_SEL_MASK;
val |= word;
sdhci_writel(host, val, SDHCI_VNDR_TUN_CTRL0_0);
tun_status = sdhci_readl(host, SDHCI_TEGRA_VNDR_TUN_STATUS0);
bit = 0;
while (bit < TUNING_WORD_BIT_SIZE) {
tap = word * TUNING_WORD_BIT_SIZE + bit;
tap_result = tun_status & (1 << bit);
if (!tap_result && !start_fail) {
start_fail = true;
if (!first_fail) {
first_fail_tap = tap;
first_fail = true;
}
} else if (tap_result && start_fail && !start_pass) {
start_pass_tap = tap;
start_pass = true;
if (!first_pass) {
first_pass_tap = tap;
first_pass = true;
}
} else if (!tap_result && start_fail && start_pass &&
!end_pass) {
end_pass_tap = tap - 1;
end_pass = true;
} else if (tap_result && start_pass && start_fail &&
end_pass) {
window = end_pass_tap - start_pass_tap;
/* discard merged window and bubble window */
if (window >= thd_up || window < thd_low) {
start_pass_tap = tap;
end_pass = false;
} else {
/* set tap at middle of valid window */
tap = start_pass_tap + window / 2;
tegra_host->tuned_tap_delay = tap;
return;
}
}
bit++;
}
}
if (!first_fail) {
WARN(1, "no edge detected, continue with hw tuned delay.\n");
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
} else if (first_pass) {
/* set tap location at fixed tap relative to the first edge */
edge1 = first_fail_tap + (first_pass_tap - first_fail_tap) / 2;
if (edge1 - 1 > fixed_tap)
tegra_host->tuned_tap_delay = edge1 - fixed_tap;
else
tegra_host->tuned_tap_delay = edge1 + fixed_tap;
}
}
static void tegra_sdhci_post_tuning(struct sdhci_host *host)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
u32 avg_tap_dly, val, min_tap_dly, max_tap_dly;
u8 fixed_tap, start_tap, end_tap, window_width;
u8 thdupper, thdlower;
u8 num_iter;
u32 clk_rate_mhz, period_ps, bestcase, worstcase;
/* retain HW tuned tap to use incase if no correction is needed */
val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
tegra_host->tuned_tap_delay = (val & SDHCI_CLOCK_CTRL_TAP_MASK) >>
SDHCI_CLOCK_CTRL_TAP_SHIFT;
if (soc_data->min_tap_delay && soc_data->max_tap_delay) {
min_tap_dly = soc_data->min_tap_delay;
max_tap_dly = soc_data->max_tap_delay;
clk_rate_mhz = tegra_host->curr_clk_rate / USEC_PER_SEC;
period_ps = USEC_PER_SEC / clk_rate_mhz;
bestcase = period_ps / min_tap_dly;
worstcase = period_ps / max_tap_dly;
/*
* Upper and Lower bound thresholds used to detect merged and
* bubble windows
*/
thdupper = (2 * worstcase + bestcase) / 2;
thdlower = worstcase / 4;
/*
* fixed tap is used when HW tuning result contains single edge
* and tap is set at fixed tap delay relative to the first edge
*/
avg_tap_dly = (period_ps * 2) / (min_tap_dly + max_tap_dly);
fixed_tap = avg_tap_dly / 2;
val = sdhci_readl(host, SDHCI_TEGRA_VNDR_TUN_STATUS1);
start_tap = val & SDHCI_TEGRA_VNDR_TUN_STATUS1_TAP_MASK;
end_tap = (val >> SDHCI_TEGRA_VNDR_TUN_STATUS1_END_TAP_SHIFT) &
SDHCI_TEGRA_VNDR_TUN_STATUS1_TAP_MASK;
window_width = end_tap - start_tap;
num_iter = host->tuning_loop_count;
/*
* partial window includes edges of the tuning range.
* merged window includes more taps so window width is higher
* than upper threshold.
*/
if (start_tap == 0 || (end_tap == (num_iter - 1)) ||
(end_tap == num_iter - 2) || window_width >= thdupper) {
pr_debug("%s: Apply tuning correction\n",
mmc_hostname(host->mmc));
tegra_sdhci_tap_correction(host, thdupper, thdlower,
fixed_tap);
}
}
tegra_sdhci_set_tap(host, tegra_host->tuned_tap_delay);
}
static int tegra_sdhci_execute_hw_tuning(struct mmc_host *mmc, u32 opcode)
{
struct sdhci_host *host = mmc_priv(mmc);
int err;
err = sdhci_execute_tuning(mmc, opcode);
if (!err && !host->tuning_err)
tegra_sdhci_post_tuning(host);
return err;
}
static void tegra_sdhci_set_uhs_signaling(struct sdhci_host *host,
unsigned timing)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
bool set_default_tap = false;
bool set_dqs_trim = false;
bool do_hs400_dll_cal = false;
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
u8 iter = TRIES_256;
u32 val;
tegra_host->ddr_signaling = false;
switch (timing) {
case MMC_TIMING_UHS_SDR50:
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
break;
case MMC_TIMING_UHS_SDR104:
case MMC_TIMING_MMC_HS200:
/* Don't set default tap on tunable modes. */
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
iter = TRIES_128;
break;
case MMC_TIMING_MMC_HS400:
set_dqs_trim = true;
do_hs400_dll_cal = true;
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
iter = TRIES_128;
break;
case MMC_TIMING_MMC_DDR52:
case MMC_TIMING_UHS_DDR50:
tegra_host->ddr_signaling = true;
set_default_tap = true;
break;
default:
set_default_tap = true;
break;
}
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
val = sdhci_readl(host, SDHCI_VNDR_TUN_CTRL0_0);
val &= ~(SDHCI_VNDR_TUN_CTRL0_TUN_ITER_MASK |
SDHCI_VNDR_TUN_CTRL0_START_TAP_VAL_MASK |
SDHCI_VNDR_TUN_CTRL0_MUL_M_MASK);
val |= (iter << SDHCI_VNDR_TUN_CTRL0_TUN_ITER_SHIFT |
0 << SDHCI_VNDR_TUN_CTRL0_START_TAP_VAL_SHIFT |
1 << SDHCI_VNDR_TUN_CTRL0_MUL_M_SHIFT);
sdhci_writel(host, val, SDHCI_VNDR_TUN_CTRL0_0);
sdhci_writel(host, 0, SDHCI_TEGRA_VNDR_TUN_CTRL1_0);
host->tuning_loop_count = (iter == TRIES_128) ? 128 : 256;
sdhci_set_uhs_signaling(host, timing);
tegra_sdhci_pad_autocalib(host);
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
if (tegra_host->tuned_tap_delay && !set_default_tap)
tegra_sdhci_set_tap(host, tegra_host->tuned_tap_delay);
else
tegra_sdhci_set_tap(host, tegra_host->default_tap);
if (set_dqs_trim)
tegra_sdhci_set_dqs_trim(host, tegra_host->dqs_trim);
if (do_hs400_dll_cal)
tegra_sdhci_hs400_dll_cal(host);
}
static int tegra_sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)
{
unsigned int min, max;
/*
* Start search for minimum tap value at 10, as smaller values are
* may wrongly be reported as working but fail at higher speeds,
* according to the TRM.
*/
min = 10;
while (min < 255) {
tegra_sdhci_set_tap(host, min);
if (!mmc_send_tuning(host->mmc, opcode, NULL))
break;
min++;
}
/* Find the maximum tap value that still passes. */
max = min + 1;
while (max < 255) {
tegra_sdhci_set_tap(host, max);
if (mmc_send_tuning(host->mmc, opcode, NULL)) {
max--;
break;
}
max++;
}
/* The TRM states the ideal tap value is at 75% in the passing range. */
tegra_sdhci_set_tap(host, min + ((max - min) * 3 / 4));
return mmc_send_tuning(host->mmc, opcode, NULL);
}
static int sdhci_tegra_start_signal_voltage_switch(struct mmc_host *mmc,
struct mmc_ios *ios)
{
struct sdhci_host *host = mmc_priv(mmc);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
int ret = 0;
if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_330) {
ret = tegra_sdhci_set_padctrl(host, ios->signal_voltage, true);
if (ret < 0)
return ret;
ret = sdhci_start_signal_voltage_switch(mmc, ios);
} else if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180) {
ret = sdhci_start_signal_voltage_switch(mmc, ios);
if (ret < 0)
return ret;
ret = tegra_sdhci_set_padctrl(host, ios->signal_voltage, true);
}
if (tegra_host->pad_calib_required)
tegra_sdhci_pad_autocalib(host);
return ret;
}
static int tegra_sdhci_init_pinctrl_info(struct device *dev,
struct sdhci_tegra *tegra_host)
{
tegra_host->pinctrl_sdmmc = devm_pinctrl_get(dev);
if (IS_ERR(tegra_host->pinctrl_sdmmc)) {
dev_dbg(dev, "No pinctrl info, err: %ld\n",
PTR_ERR(tegra_host->pinctrl_sdmmc));
return -1;
}
tegra_host->pinctrl_state_1v8_drv = pinctrl_lookup_state(
tegra_host->pinctrl_sdmmc, "sdmmc-1v8-drv");
if (IS_ERR(tegra_host->pinctrl_state_1v8_drv)) {
if (PTR_ERR(tegra_host->pinctrl_state_1v8_drv) == -ENODEV)
tegra_host->pinctrl_state_1v8_drv = NULL;
}
tegra_host->pinctrl_state_3v3_drv = pinctrl_lookup_state(
tegra_host->pinctrl_sdmmc, "sdmmc-3v3-drv");
if (IS_ERR(tegra_host->pinctrl_state_3v3_drv)) {
if (PTR_ERR(tegra_host->pinctrl_state_3v3_drv) == -ENODEV)
tegra_host->pinctrl_state_3v3_drv = NULL;
}
tegra_host->pinctrl_state_3v3 =
pinctrl_lookup_state(tegra_host->pinctrl_sdmmc, "sdmmc-3v3");
if (IS_ERR(tegra_host->pinctrl_state_3v3)) {
dev_warn(dev, "Missing 3.3V pad state, err: %ld\n",
PTR_ERR(tegra_host->pinctrl_state_3v3));
return -1;
}
tegra_host->pinctrl_state_1v8 =
pinctrl_lookup_state(tegra_host->pinctrl_sdmmc, "sdmmc-1v8");
if (IS_ERR(tegra_host->pinctrl_state_1v8)) {
dev_warn(dev, "Missing 1.8V pad state, err: %ld\n",
PTR_ERR(tegra_host->pinctrl_state_1v8));
return -1;
}
tegra_host->pad_control_available = true;
return 0;
}
static void tegra_sdhci_voltage_switch(struct sdhci_host *host)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
if (soc_data->nvquirks & NVQUIRK_HAS_PADCALIB)
tegra_host->pad_calib_required = true;
}
static void tegra_cqhci_writel(struct cqhci_host *cq_host, u32 val, int reg)
{
struct mmc_host *mmc = cq_host->mmc;
u8 ctrl;
ktime_t timeout;
bool timed_out;
/*
* During CQE resume/unhalt, CQHCI driver unhalts CQE prior to
* cqhci_host_ops enable where SDHCI DMA and BLOCK_SIZE registers need
* to be re-configured.
* Tegra CQHCI/SDHCI prevents write access to block size register when
* CQE is unhalted. So handling CQE resume sequence here to configure
* SDHCI block registers prior to exiting CQE halt state.
*/
if (reg == CQHCI_CTL && !(val & CQHCI_HALT) &&
cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) {
sdhci_cqe_enable(mmc);
writel(val, cq_host->mmio + reg);
timeout = ktime_add_us(ktime_get(), 50);
while (1) {
timed_out = ktime_compare(ktime_get(), timeout) > 0;
ctrl = cqhci_readl(cq_host, CQHCI_CTL);
if (!(ctrl & CQHCI_HALT) || timed_out)
break;
}
/*
* CQE usually resumes very quick, but incase if Tegra CQE
* doesn't resume retry unhalt.
*/
if (timed_out)
writel(val, cq_host->mmio + reg);
} else {
writel(val, cq_host->mmio + reg);
}
}
static void sdhci_tegra_update_dcmd_desc(struct mmc_host *mmc,
struct mmc_request *mrq, u64 *data)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(mmc_priv(mmc));
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
if (soc_data->nvquirks & NVQUIRK_CQHCI_DCMD_R1B_CMD_TIMING &&
mrq->cmd->flags & MMC_RSP_R1B)
*data |= CQHCI_CMD_TIMING(1);
}
static void sdhci_tegra_cqe_enable(struct mmc_host *mmc)
{
struct cqhci_host *cq_host = mmc->cqe_private;
u32 val;
/*
* Tegra CQHCI/SDMMC design prevents write access to sdhci block size
* register when CQE is enabled and unhalted.
* CQHCI driver enables CQE prior to activation, so disable CQE before
* programming block size in sdhci controller and enable it back.
*/
if (!cq_host->activated) {
val = cqhci_readl(cq_host, CQHCI_CFG);
if (val & CQHCI_ENABLE)
cqhci_writel(cq_host, (val & ~CQHCI_ENABLE),
CQHCI_CFG);
sdhci_cqe_enable(mmc);
if (val & CQHCI_ENABLE)
cqhci_writel(cq_host, val, CQHCI_CFG);
}
/*
* CMD CRC errors are seen sometimes with some eMMC devices when status
* command is sent during transfer of last data block which is the
* default case as send status command block counter (CBC) is 1.
* Recommended fix to set CBC to 0 allowing send status command only
* when data lines are idle.
*/
val = cqhci_readl(cq_host, CQHCI_SSC1);
val &= ~CQHCI_SSC1_CBC_MASK;
cqhci_writel(cq_host, val, CQHCI_SSC1);
}
static void sdhci_tegra_dumpregs(struct mmc_host *mmc)
{
sdhci_dumpregs(mmc_priv(mmc));
}
static u32 sdhci_tegra_cqhci_irq(struct sdhci_host *host, u32 intmask)
{
int cmd_error = 0;
int data_error = 0;
if (!sdhci_cqe_irq(host, intmask, &cmd_error, &data_error))
return intmask;
cqhci_irq(host->mmc, intmask, cmd_error, data_error);
return 0;
}
static const struct cqhci_host_ops sdhci_tegra_cqhci_ops = {
.write_l = tegra_cqhci_writel,
.enable = sdhci_tegra_cqe_enable,
.disable = sdhci_cqe_disable,
.dumpregs = sdhci_tegra_dumpregs,
.update_dcmd_desc = sdhci_tegra_update_dcmd_desc,
};
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
static int tegra_sdhci_set_dma_mask(struct sdhci_host *host)
{
struct sdhci_pltfm_host *platform = sdhci_priv(host);
struct sdhci_tegra *tegra = sdhci_pltfm_priv(platform);
const struct sdhci_tegra_soc_data *soc = tegra->soc_data;
struct device *dev = mmc_dev(host->mmc);
if (soc->dma_mask)
return dma_set_mask_and_coherent(dev, soc->dma_mask);
return 0;
}
static const struct sdhci_ops tegra_sdhci_ops = {
.get_ro = tegra_sdhci_get_ro,
.read_w = tegra_sdhci_readw,
.write_l = tegra_sdhci_writel,
.set_clock = tegra_sdhci_set_clock,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.set_dma_mask = tegra_sdhci_set_dma_mask,
.set_bus_width = sdhci_set_bus_width,
.reset = tegra_sdhci_reset,
.platform_execute_tuning = tegra_sdhci_execute_tuning,
.set_uhs_signaling = tegra_sdhci_set_uhs_signaling,
.voltage_switch = tegra_sdhci_voltage_switch,
.get_max_clock = tegra_sdhci_get_max_clock,
};
static const struct sdhci_pltfm_data sdhci_tegra20_pdata = {
.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
SDHCI_QUIRK_SINGLE_POWER_WRITE |
SDHCI_QUIRK_NO_HISPD_BIT |
SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
.ops = &tegra_sdhci_ops,
};
static const struct sdhci_tegra_soc_data soc_data_tegra20 = {
.pdata = &sdhci_tegra20_pdata,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.dma_mask = DMA_BIT_MASK(32),
.nvquirks = NVQUIRK_FORCE_SDHCI_SPEC_200 |
NVQUIRK_ENABLE_BLOCK_GAP_DET,
};
static const struct sdhci_pltfm_data sdhci_tegra30_pdata = {
.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
SDHCI_QUIRK_SINGLE_POWER_WRITE |
SDHCI_QUIRK_NO_HISPD_BIT |
SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
SDHCI_QUIRK2_BROKEN_HS200 |
/*
* Auto-CMD23 leads to "Got command interrupt 0x00010000 even
* though no command operation was in progress."
*
* The exact reason is unknown, as the same hardware seems
* to support Auto CMD23 on a downstream 3.1 kernel.
*/
SDHCI_QUIRK2_ACMD23_BROKEN,
.ops = &tegra_sdhci_ops,
};
static const struct sdhci_tegra_soc_data soc_data_tegra30 = {
.pdata = &sdhci_tegra30_pdata,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.dma_mask = DMA_BIT_MASK(32),
.nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300 |
NVQUIRK_ENABLE_SDR50 |
NVQUIRK_ENABLE_SDR104 |
NVQUIRK_HAS_PADCALIB,
};
static const struct sdhci_ops tegra114_sdhci_ops = {
.get_ro = tegra_sdhci_get_ro,
.read_w = tegra_sdhci_readw,
.write_w = tegra_sdhci_writew,
.write_l = tegra_sdhci_writel,
.set_clock = tegra_sdhci_set_clock,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.set_dma_mask = tegra_sdhci_set_dma_mask,
.set_bus_width = sdhci_set_bus_width,
.reset = tegra_sdhci_reset,
.platform_execute_tuning = tegra_sdhci_execute_tuning,
.set_uhs_signaling = tegra_sdhci_set_uhs_signaling,
.voltage_switch = tegra_sdhci_voltage_switch,
.get_max_clock = tegra_sdhci_get_max_clock,
};
static const struct sdhci_pltfm_data sdhci_tegra114_pdata = {
.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
SDHCI_QUIRK_SINGLE_POWER_WRITE |
SDHCI_QUIRK_NO_HISPD_BIT |
SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
.ops = &tegra114_sdhci_ops,
};
static const struct sdhci_tegra_soc_data soc_data_tegra114 = {
.pdata = &sdhci_tegra114_pdata,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.dma_mask = DMA_BIT_MASK(32),
};
static const struct sdhci_pltfm_data sdhci_tegra124_pdata = {
.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
SDHCI_QUIRK_SINGLE_POWER_WRITE |
SDHCI_QUIRK_NO_HISPD_BIT |
SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
.ops = &tegra114_sdhci_ops,
};
static const struct sdhci_tegra_soc_data soc_data_tegra124 = {
.pdata = &sdhci_tegra124_pdata,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.dma_mask = DMA_BIT_MASK(34),
};
static const struct sdhci_ops tegra210_sdhci_ops = {
.get_ro = tegra_sdhci_get_ro,
.read_w = tegra_sdhci_readw,
.write_w = tegra210_sdhci_writew,
.write_l = tegra_sdhci_writel,
.set_clock = tegra_sdhci_set_clock,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.set_dma_mask = tegra_sdhci_set_dma_mask,
.set_bus_width = sdhci_set_bus_width,
.reset = tegra_sdhci_reset,
.set_uhs_signaling = tegra_sdhci_set_uhs_signaling,
.voltage_switch = tegra_sdhci_voltage_switch,
.get_max_clock = tegra_sdhci_get_max_clock,
};
static const struct sdhci_pltfm_data sdhci_tegra210_pdata = {
.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
SDHCI_QUIRK_SINGLE_POWER_WRITE |
SDHCI_QUIRK_NO_HISPD_BIT |
SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
.ops = &tegra210_sdhci_ops,
};
static const struct sdhci_tegra_soc_data soc_data_tegra210 = {
.pdata = &sdhci_tegra210_pdata,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.dma_mask = DMA_BIT_MASK(34),
.nvquirks = NVQUIRK_NEEDS_PAD_CONTROL |
NVQUIRK_HAS_PADCALIB |
NVQUIRK_DIS_CARD_CLK_CONFIG_TAP |
NVQUIRK_ENABLE_SDR50 |
NVQUIRK_ENABLE_SDR104,
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
.min_tap_delay = 106,
.max_tap_delay = 185,
};
static const struct sdhci_ops tegra186_sdhci_ops = {
.get_ro = tegra_sdhci_get_ro,
.read_w = tegra_sdhci_readw,
.write_l = tegra_sdhci_writel,
.set_clock = tegra_sdhci_set_clock,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.set_dma_mask = tegra_sdhci_set_dma_mask,
.set_bus_width = sdhci_set_bus_width,
.reset = tegra_sdhci_reset,
.set_uhs_signaling = tegra_sdhci_set_uhs_signaling,
.voltage_switch = tegra_sdhci_voltage_switch,
.get_max_clock = tegra_sdhci_get_max_clock,
.irq = sdhci_tegra_cqhci_irq,
};
static const struct sdhci_pltfm_data sdhci_tegra186_pdata = {
.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
SDHCI_QUIRK_SINGLE_POWER_WRITE |
SDHCI_QUIRK_NO_HISPD_BIT |
SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
.ops = &tegra186_sdhci_ops,
};
static const struct sdhci_tegra_soc_data soc_data_tegra186 = {
.pdata = &sdhci_tegra186_pdata,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.dma_mask = DMA_BIT_MASK(40),
.nvquirks = NVQUIRK_NEEDS_PAD_CONTROL |
NVQUIRK_HAS_PADCALIB |
NVQUIRK_DIS_CARD_CLK_CONFIG_TAP |
NVQUIRK_ENABLE_SDR50 |
NVQUIRK_ENABLE_SDR104 |
NVQUIRK_CQHCI_DCMD_R1B_CMD_TIMING,
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
.min_tap_delay = 84,
.max_tap_delay = 136,
};
static const struct sdhci_tegra_soc_data soc_data_tegra194 = {
.pdata = &sdhci_tegra186_pdata,
mmc: tegra: Implement ->set_dma_mask() The SDHCI controller on Tegra186 supports 40-bit addressing, which is usually enough to address all of system memory. However, if the SDHCI controller is behind an IOMMU, the address space can go beyond. This happens on Tegra186 and later where the ARM SMMU has an input address space of 48 bits. If the DMA API is backed by this ARM SMMU, the top- down IOVA allocator will cause IOV addresses to be returned that the SDHCI controller cannot access. Unfortunately, prior to the introduction of the ->set_dma_mask() host operation, the SDHCI core would set either a 64-bit DMA mask if the controller claimed to support 64-bit addressing, or a 32-bit DMA mask otherwise. Since the full 64 bits cannot be addressed on Tegra, this had to be worked around in commit 68481a7e1c84 ("mmc: tegra: Mark 64 bit dma broken on Tegra186") by setting the SDHCI_QUIRK2_BROKEN_64_BIT_DMA quirk, which effectively restricts the DMA mask to 32 bits. One disadvantage of this is that dma_map_*() APIs will now try to use the swiotlb to bounce DMA to addresses beyond of the controller's DMA mask. This in turn caused degraded performance and can lead to situations where the swiotlb buffer is exhausted, which in turn leads to DMA transfers to fail. With the recent introduction of the ->set_dma_mask() host operation, this can now be properly fixed. For each generation of Tegra, the exact supported DMA mask can be configured. This kills two birds with one stone: it avoids the use of bounce buffers because system memory never exceeds the addressable memory range of the SDHCI controllers on these devices, and at the same time when an IOMMU is involved, it prevents IOV addresses from being allocated beyond the addressible range of the controllers. Since the DMA mask is now properly handled, the 64-bit DMA quirk can be removed. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> [treding@nvidia.com: provide more background in commit message] Tested-by: Nicolin Chen <nicoleotsuka@gmail.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Thierry Reding <treding@nvidia.com> Cc: stable@vger.kernel.org # v4.15 + Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-09-23 18:08:10 +08:00
.dma_mask = DMA_BIT_MASK(39),
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
.nvquirks = NVQUIRK_NEEDS_PAD_CONTROL |
NVQUIRK_HAS_PADCALIB |
NVQUIRK_DIS_CARD_CLK_CONFIG_TAP |
NVQUIRK_ENABLE_SDR50 |
NVQUIRK_ENABLE_SDR104,
.min_tap_delay = 96,
.max_tap_delay = 139,
};
static const struct of_device_id sdhci_tegra_dt_match[] = {
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
{ .compatible = "nvidia,tegra194-sdhci", .data = &soc_data_tegra194 },
{ .compatible = "nvidia,tegra186-sdhci", .data = &soc_data_tegra186 },
{ .compatible = "nvidia,tegra210-sdhci", .data = &soc_data_tegra210 },
{ .compatible = "nvidia,tegra124-sdhci", .data = &soc_data_tegra124 },
{ .compatible = "nvidia,tegra114-sdhci", .data = &soc_data_tegra114 },
{ .compatible = "nvidia,tegra30-sdhci", .data = &soc_data_tegra30 },
{ .compatible = "nvidia,tegra20-sdhci", .data = &soc_data_tegra20 },
{}
};
MODULE_DEVICE_TABLE(of, sdhci_tegra_dt_match);
static int sdhci_tegra_add_host(struct sdhci_host *host)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
struct cqhci_host *cq_host;
bool dma64;
int ret;
if (!tegra_host->enable_hwcq)
return sdhci_add_host(host);
sdhci_enable_v4_mode(host);
ret = sdhci_setup_host(host);
if (ret)
return ret;
host->mmc->caps2 |= MMC_CAP2_CQE | MMC_CAP2_CQE_DCMD;
cq_host = devm_kzalloc(host->mmc->parent,
sizeof(*cq_host), GFP_KERNEL);
if (!cq_host) {
ret = -ENOMEM;
goto cleanup;
}
cq_host->mmio = host->ioaddr + SDHCI_TEGRA_CQE_BASE_ADDR;
cq_host->ops = &sdhci_tegra_cqhci_ops;
dma64 = host->flags & SDHCI_USE_64_BIT_DMA;
if (dma64)
cq_host->caps |= CQHCI_TASK_DESC_SZ_128;
ret = cqhci_init(cq_host, host->mmc, dma64);
if (ret)
goto cleanup;
ret = __sdhci_add_host(host);
if (ret)
goto cleanup;
return 0;
cleanup:
sdhci_cleanup_host(host);
return ret;
}
static int sdhci_tegra_probe(struct platform_device *pdev)
{
const struct of_device_id *match;
const struct sdhci_tegra_soc_data *soc_data;
struct sdhci_host *host;
struct sdhci_pltfm_host *pltfm_host;
struct sdhci_tegra *tegra_host;
struct clk *clk;
int rc;
match = of_match_device(sdhci_tegra_dt_match, &pdev->dev);
if (!match)
return -EINVAL;
soc_data = match->data;
host = sdhci_pltfm_init(pdev, soc_data->pdata, sizeof(*tegra_host));
if (IS_ERR(host))
return PTR_ERR(host);
pltfm_host = sdhci_priv(host);
tegra_host = sdhci_pltfm_priv(pltfm_host);
tegra_host->ddr_signaling = false;
tegra_host->pad_calib_required = false;
tegra_host->pad_control_available = false;
tegra_host->soc_data = soc_data;
if (soc_data->nvquirks & NVQUIRK_NEEDS_PAD_CONTROL) {
rc = tegra_sdhci_init_pinctrl_info(&pdev->dev, tegra_host);
if (rc == 0)
host->mmc_host_ops.start_signal_voltage_switch =
sdhci_tegra_start_signal_voltage_switch;
}
/* Hook to periodically rerun pad calibration */
if (soc_data->nvquirks & NVQUIRK_HAS_PADCALIB)
host->mmc_host_ops.request = tegra_sdhci_request;
host->mmc_host_ops.hs400_enhanced_strobe =
tegra_sdhci_hs400_enhanced_strobe;
mmc: tegra: update hw tuning process This patch includes below HW tuning related fixes. configures tuning parameters as per Tegra TRM WAR fix for manual tap change HW auto-tuning post process As per Tegra TRM, SDR50 mode tuning execution takes upto maximum of 256 tuning iterations and SDR104/HS200/HS400 modes tuning execution takes upto maximum of 128 tuning iterations. This patch programs tuning control register with maximum tuning iterations needed based on the timing along with the start tap, multiplier, and step size used by the HW tuning. Tegra210 has a known issue of glitch on trimmer output when the tap value is changed with the trimmer input clock running and the WAR is to disable card clock before sending tuning command and after sending tuning command wait for 1usec and issue SW reset followed by enabling card clock. This WAR is applicable when changing tap value manually as well. Tegra SDHCI driver has this implemented correctly for manual tap change but missing SW reset before enabling card clock during sending tuning command. Issuing SW reset during tuning command as a part of WAR and is applicable in cases where tuning is performed with single step size for more iterations. This patch includes this fix. HW auto-tuning finds the best largest passing window and sets the tap at the middle of the window. With some devices like sandisk eMMC driving fast edges and due to high tap to tap delay in the Tegra chipset, auto-tuning does not detect falling tap between the valid windows resulting in a parital window or a merged window and the best tap is set at the signal transition which is actually the worst tap location. Recommended SW solution is to detect if the best passing window picked by the HW tuning is a partial or a merged window based on min and max tap delays found from chip characterization across PVT and perform tuning correction to pick the best tap. This patch has implementation of this post HW tuning process for the tegra hosts that support HW tuning through the callback function tegra_sdhci_execute_hw_tuning and uses the tuned tap delay. Tested-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Sowjanya Komatineni <skomatineni@nvidia.com> Acked-by: Adrian Hunter <adrian.hunter@intel.com> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
2019-03-24 12:45:20 +08:00
if (!host->ops->platform_execute_tuning)
host->mmc_host_ops.execute_tuning =
tegra_sdhci_execute_hw_tuning;
rc = mmc_of_parse(host->mmc);
if (rc)
goto err_parse_dt;
if (tegra_host->soc_data->nvquirks & NVQUIRK_ENABLE_DDR50)
host->mmc->caps |= MMC_CAP_1_8V_DDR;
tegra_sdhci_parse_dt(host);
tegra_host->power_gpio = devm_gpiod_get_optional(&pdev->dev, "power",
GPIOD_OUT_HIGH);
if (IS_ERR(tegra_host->power_gpio)) {
rc = PTR_ERR(tegra_host->power_gpio);
goto err_power_req;
}
clk = devm_clk_get(mmc_dev(host->mmc), NULL);
if (IS_ERR(clk)) {
rc = PTR_ERR(clk);
if (rc != -EPROBE_DEFER)
dev_err(&pdev->dev, "failed to get clock: %d\n", rc);
goto err_clk_get;
}
clk_prepare_enable(clk);
pltfm_host->clk = clk;
tegra_host->rst = devm_reset_control_get_exclusive(&pdev->dev,
"sdhci");
if (IS_ERR(tegra_host->rst)) {
rc = PTR_ERR(tegra_host->rst);
dev_err(&pdev->dev, "failed to get reset control: %d\n", rc);
goto err_rst_get;
}
rc = reset_control_assert(tegra_host->rst);
if (rc)
goto err_rst_get;
usleep_range(2000, 4000);
rc = reset_control_deassert(tegra_host->rst);
if (rc)
goto err_rst_get;
usleep_range(2000, 4000);
rc = sdhci_tegra_add_host(host);
if (rc)
goto err_add_host;
return 0;
err_add_host:
reset_control_assert(tegra_host->rst);
err_rst_get:
clk_disable_unprepare(pltfm_host->clk);
err_clk_get:
err_power_req:
err_parse_dt:
sdhci_pltfm_free(pdev);
return rc;
}
static int sdhci_tegra_remove(struct platform_device *pdev)
{
struct sdhci_host *host = platform_get_drvdata(pdev);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
sdhci_remove_host(host, 0);
reset_control_assert(tegra_host->rst);
usleep_range(2000, 4000);
clk_disable_unprepare(pltfm_host->clk);
sdhci_pltfm_free(pdev);
return 0;
}
#ifdef CONFIG_PM_SLEEP
static int __maybe_unused sdhci_tegra_suspend(struct device *dev)
{
struct sdhci_host *host = dev_get_drvdata(dev);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
int ret;
if (host->mmc->caps2 & MMC_CAP2_CQE) {
ret = cqhci_suspend(host->mmc);
if (ret)
return ret;
}
ret = sdhci_suspend_host(host);
if (ret) {
cqhci_resume(host->mmc);
return ret;
}
clk_disable_unprepare(pltfm_host->clk);
return 0;
}
static int __maybe_unused sdhci_tegra_resume(struct device *dev)
{
struct sdhci_host *host = dev_get_drvdata(dev);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
int ret;
ret = clk_prepare_enable(pltfm_host->clk);
if (ret)
return ret;
ret = sdhci_resume_host(host);
if (ret)
goto disable_clk;
if (host->mmc->caps2 & MMC_CAP2_CQE) {
ret = cqhci_resume(host->mmc);
if (ret)
goto suspend_host;
}
return 0;
suspend_host:
sdhci_suspend_host(host);
disable_clk:
clk_disable_unprepare(pltfm_host->clk);
return ret;
}
#endif
static SIMPLE_DEV_PM_OPS(sdhci_tegra_dev_pm_ops, sdhci_tegra_suspend,
sdhci_tegra_resume);
static struct platform_driver sdhci_tegra_driver = {
.driver = {
.name = "sdhci-tegra",
.of_match_table = sdhci_tegra_dt_match,
.pm = &sdhci_tegra_dev_pm_ops,
},
.probe = sdhci_tegra_probe,
.remove = sdhci_tegra_remove,
};
module_platform_driver(sdhci_tegra_driver);
MODULE_DESCRIPTION("SDHCI driver for Tegra");
MODULE_AUTHOR("Google, Inc.");
MODULE_LICENSE("GPL v2");