Merge branch 'stmmac-10GbE-using-XGMAC'

Jose Abreu says:

====================
net: stmmac: 10GbE using XGMAC

Support for 10Gb Link using XGMAC core plus some performance tweaks.

Tested in a PCI based setup.

iperf3 TCP results:
	TSO ON, MTU=1500, TX Queues = 1, RX Queues = 1, Flow Control ON
	Pinned CPU (-A), Zero-Copy (-Z)

[ ID] Interval           Transfer     Bitrate         Retr
[  5]   0.00-600.00 sec   643 GBytes  9.21 Gbits/sec    1             sender
[  5]   0.00-600.00 sec   643 GBytes  9.21 Gbits/sec                  receiver
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-06-28 09:23:39 -07:00
commit 8792e82ddb
7 changed files with 176 additions and 56 deletions

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config STMMAC_ETH
tristate "STMicroelectronics 10/100/1000/EQOS Ethernet driver"
tristate "STMicroelectronics 10/100/1000/EQOS/2500/5000/10000 Ethernet driver"
depends on HAS_IOMEM && HAS_DMA
select MII
select PHYLINK

View File

@ -246,7 +246,7 @@ struct stmmac_safety_stats {
/* Max/Min RI Watchdog Timer count value */
#define MAX_DMA_RIWT 0xff
#define MIN_DMA_RIWT 0x20
#define MIN_DMA_RIWT 0x10
/* Tx coalesce parameters */
#define STMMAC_COAL_TX_TIMER 1000
#define STMMAC_MAX_COAL_TX_TICK 100000
@ -351,6 +351,7 @@ struct dma_features {
unsigned int frpsel;
unsigned int frpbs;
unsigned int frpes;
unsigned int addr64;
};
/* GMAC TX FIFO is 8K, Rx FIFO is 16K */
@ -392,8 +393,12 @@ struct mac_link {
u32 speed100;
u32 speed1000;
u32 speed2500;
u32 speed10000;
u32 duplex;
struct {
u32 speed2500;
u32 speed5000;
u32 speed10000;
} xgmii;
};
struct mii_regs {

View File

@ -15,10 +15,14 @@
/* MAC Registers */
#define XGMAC_TX_CONFIG 0x00000000
#define XGMAC_CONFIG_SS_OFF 29
#define XGMAC_CONFIG_SS_MASK GENMASK(30, 29)
#define XGMAC_CONFIG_SS_MASK GENMASK(31, 29)
#define XGMAC_CONFIG_SS_10000 (0x0 << XGMAC_CONFIG_SS_OFF)
#define XGMAC_CONFIG_SS_2500 (0x2 << XGMAC_CONFIG_SS_OFF)
#define XGMAC_CONFIG_SS_1000 (0x3 << XGMAC_CONFIG_SS_OFF)
#define XGMAC_CONFIG_SS_2500_GMII (0x2 << XGMAC_CONFIG_SS_OFF)
#define XGMAC_CONFIG_SS_1000_GMII (0x3 << XGMAC_CONFIG_SS_OFF)
#define XGMAC_CONFIG_SS_100_MII (0x4 << XGMAC_CONFIG_SS_OFF)
#define XGMAC_CONFIG_SS_5000 (0x5 << XGMAC_CONFIG_SS_OFF)
#define XGMAC_CONFIG_SS_2500 (0x6 << XGMAC_CONFIG_SS_OFF)
#define XGMAC_CONFIG_SS_10_MII (0x7 << XGMAC_CONFIG_SS_OFF)
#define XGMAC_CONFIG_SARC GENMASK(22, 20)
#define XGMAC_CONFIG_SARC_SHIFT 20
#define XGMAC_CONFIG_JD BIT(16)
@ -83,6 +87,7 @@
#define XGMAC_HWFEAT_GMIISEL BIT(1)
#define XGMAC_HW_FEATURE1 0x00000120
#define XGMAC_HWFEAT_TSOEN BIT(18)
#define XGMAC_HWFEAT_ADDR64 GENMASK(15, 14)
#define XGMAC_HWFEAT_TXFIFOSIZE GENMASK(10, 6)
#define XGMAC_HWFEAT_RXFIFOSIZE GENMASK(4, 0)
#define XGMAC_HW_FEATURE2 0x00000124
@ -168,6 +173,7 @@
#define XGMAC_EN_LPI BIT(15)
#define XGMAC_LPI_XIT_PKT BIT(14)
#define XGMAC_AAL BIT(12)
#define XGMAC_EAME BIT(11)
#define XGMAC_BLEN GENMASK(7, 1)
#define XGMAC_BLEN256 BIT(7)
#define XGMAC_BLEN128 BIT(6)
@ -177,6 +183,10 @@
#define XGMAC_BLEN8 BIT(2)
#define XGMAC_BLEN4 BIT(1)
#define XGMAC_UNDEF BIT(0)
#define XGMAC_TX_EDMA_CTRL 0x00003040
#define XGMAC_TDPS GENMASK(29, 0)
#define XGMAC_RX_EDMA_CTRL 0x00003044
#define XGMAC_RDPS GENMASK(29, 0)
#define XGMAC_DMA_CH_CONTROL(x) (0x00003100 + (0x80 * (x)))
#define XGMAC_PBLx8 BIT(16)
#define XGMAC_DMA_CH_TX_CONTROL(x) (0x00003104 + (0x80 * (x)))

View File

@ -36,7 +36,7 @@ static void dwxgmac2_core_init(struct mac_device_info *hw,
switch (hw->ps) {
case SPEED_10000:
tx |= hw->link.speed10000;
tx |= hw->link.xgmii.speed10000;
break;
case SPEED_2500:
tx |= hw->link.speed2500;
@ -381,11 +381,13 @@ int dwxgmac2_setup(struct stmmac_priv *priv)
mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
mac->link.duplex = 0;
mac->link.speed10 = 0;
mac->link.speed100 = 0;
mac->link.speed1000 = XGMAC_CONFIG_SS_1000;
mac->link.speed2500 = XGMAC_CONFIG_SS_2500;
mac->link.speed10000 = XGMAC_CONFIG_SS_10000;
mac->link.speed10 = XGMAC_CONFIG_SS_10_MII;
mac->link.speed100 = XGMAC_CONFIG_SS_100_MII;
mac->link.speed1000 = XGMAC_CONFIG_SS_1000_GMII;
mac->link.speed2500 = XGMAC_CONFIG_SS_2500_GMII;
mac->link.xgmii.speed2500 = XGMAC_CONFIG_SS_2500;
mac->link.xgmii.speed5000 = XGMAC_CONFIG_SS_5000;
mac->link.xgmii.speed10000 = XGMAC_CONFIG_SS_10000;
mac->link.speed_mask = XGMAC_CONFIG_SS_MASK;
mac->mii.addr = XGMAC_MDIO_ADDR;

View File

@ -242,8 +242,8 @@ static void dwxgmac2_get_addr(struct dma_desc *p, unsigned int *addr)
static void dwxgmac2_set_addr(struct dma_desc *p, dma_addr_t addr)
{
p->des0 = cpu_to_le32(addr);
p->des1 = 0;
p->des0 = cpu_to_le32(lower_32_bits(addr));
p->des1 = cpu_to_le32(upper_32_bits(addr));
}
static void dwxgmac2_clear(struct dma_desc *p)

View File

@ -27,7 +27,7 @@ static void dwxgmac2_dma_init(void __iomem *ioaddr,
if (dma_cfg->aal)
value |= XGMAC_AAL;
writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE);
writel(value | XGMAC_EAME, ioaddr + XGMAC_DMA_SYSBUS_MODE);
}
static void dwxgmac2_dma_init_chan(void __iomem *ioaddr,
@ -91,11 +91,11 @@ static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
value |= (axi->axi_rd_osr_lmt << XGMAC_RD_OSR_LMT_SHIFT) &
XGMAC_RD_OSR_LMT;
if (!axi->axi_fb)
value |= XGMAC_UNDEF;
value &= ~XGMAC_BLEN;
for (i = 0; i < AXI_BLEN; i++) {
if (axi->axi_blen[i])
value &= ~XGMAC_UNDEF;
switch (axi->axi_blen[i]) {
case 256:
value |= XGMAC_BLEN256;
@ -122,6 +122,8 @@ static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
}
writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE);
writel(XGMAC_TDPS, ioaddr + XGMAC_TX_EDMA_CTRL);
writel(XGMAC_RDPS, ioaddr + XGMAC_RX_EDMA_CTRL);
}
static void dwxgmac2_dma_rx_mode(void __iomem *ioaddr, int mode,
@ -359,6 +361,23 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
/* MAC HW feature 1 */
hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1);
dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18;
dma_cap->addr64 = (hw_cap & XGMAC_HWFEAT_ADDR64) >> 14;
switch (dma_cap->addr64) {
case 0:
dma_cap->addr64 = 32;
break;
case 1:
dma_cap->addr64 = 40;
break;
case 2:
dma_cap->addr64 = 48;
break;
default:
dma_cap->addr64 = 32;
break;
}
dma_cap->tx_fifo_size =
128 << ((hw_cap & XGMAC_HWFEAT_TXFIFOSIZE) >> 6);
dma_cap->rx_fifo_size =

View File

@ -805,14 +805,43 @@ static void stmmac_validate(struct phylink_config *config,
struct phylink_link_state *state)
{
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
__ETHTOOL_DECLARE_LINK_MODE_MASK(mac_supported) = { 0, };
__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
int tx_cnt = priv->plat->tx_queues_to_use;
int max_speed = priv->plat->max_speed;
phylink_set(mac_supported, 10baseT_Half);
phylink_set(mac_supported, 10baseT_Full);
phylink_set(mac_supported, 100baseT_Half);
phylink_set(mac_supported, 100baseT_Full);
phylink_set(mac_supported, Autoneg);
phylink_set(mac_supported, Pause);
phylink_set(mac_supported, Asym_Pause);
phylink_set_port_modes(mac_supported);
if (priv->plat->has_gmac ||
priv->plat->has_gmac4 ||
priv->plat->has_xgmac) {
phylink_set(mac_supported, 1000baseT_Half);
phylink_set(mac_supported, 1000baseT_Full);
phylink_set(mac_supported, 1000baseKX_Full);
}
/* Cut down 1G if asked to */
if ((max_speed > 0) && (max_speed < 1000)) {
phylink_set(mask, 1000baseT_Full);
phylink_set(mask, 1000baseX_Full);
} else if (priv->plat->has_xgmac) {
phylink_set(mac_supported, 2500baseT_Full);
phylink_set(mac_supported, 5000baseT_Full);
phylink_set(mac_supported, 10000baseSR_Full);
phylink_set(mac_supported, 10000baseLR_Full);
phylink_set(mac_supported, 10000baseER_Full);
phylink_set(mac_supported, 10000baseLRM_Full);
phylink_set(mac_supported, 10000baseT_Full);
phylink_set(mac_supported, 10000baseKX4_Full);
phylink_set(mac_supported, 10000baseKR_Full);
}
/* Half-Duplex can only work with single queue */
@ -822,7 +851,12 @@ static void stmmac_validate(struct phylink_config *config,
phylink_set(mask, 1000baseT_Half);
}
bitmap_andnot(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
bitmap_and(supported, supported, mac_supported,
__ETHTOOL_LINK_MODE_MASK_NBITS);
bitmap_andnot(supported, supported, mask,
__ETHTOOL_LINK_MODE_MASK_NBITS);
bitmap_and(state->advertising, state->advertising, mac_supported,
__ETHTOOL_LINK_MODE_MASK_NBITS);
bitmap_andnot(state->advertising, state->advertising, mask,
__ETHTOOL_LINK_MODE_MASK_NBITS);
}
@ -842,18 +876,37 @@ static void stmmac_mac_config(struct phylink_config *config, unsigned int mode,
ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
ctrl &= ~priv->hw->link.speed_mask;
switch (state->speed) {
case SPEED_1000:
ctrl |= priv->hw->link.speed1000;
break;
case SPEED_100:
ctrl |= priv->hw->link.speed100;
break;
case SPEED_10:
ctrl |= priv->hw->link.speed10;
break;
default:
return;
if (state->interface == PHY_INTERFACE_MODE_USXGMII) {
switch (state->speed) {
case SPEED_10000:
ctrl |= priv->hw->link.xgmii.speed10000;
break;
case SPEED_5000:
ctrl |= priv->hw->link.xgmii.speed5000;
break;
case SPEED_2500:
ctrl |= priv->hw->link.xgmii.speed2500;
break;
default:
return;
}
} else {
switch (state->speed) {
case SPEED_2500:
ctrl |= priv->hw->link.speed2500;
break;
case SPEED_1000:
ctrl |= priv->hw->link.speed1000;
break;
case SPEED_100:
ctrl |= priv->hw->link.speed100;
break;
case SPEED_10:
ctrl |= priv->hw->link.speed10;
break;
default:
return;
}
}
priv->speed = state->speed;
@ -896,7 +949,7 @@ static void stmmac_mac_link_up(struct phylink_config *config,
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
stmmac_mac_set(priv, priv->ioaddr, true);
if (phy) {
if (phy && priv->dma_cap.eee) {
priv->eee_active = phy_init_eee(phy, 1) >= 0;
priv->eee_enabled = stmmac_eee_init(priv);
stmmac_set_eee_pls(priv, priv->hw, true);
@ -2000,18 +2053,16 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
&priv->xstats, chan);
struct stmmac_channel *ch = &priv->channel[chan];
if (status)
status |= handle_rx | handle_tx;
if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
napi_schedule_irqoff(&ch->rx_napi);
if (napi_schedule_prep(&ch->rx_napi)) {
stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
__napi_schedule_irqoff(&ch->rx_napi);
status |= handle_tx;
}
}
if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use))
napi_schedule_irqoff(&ch->tx_napi);
}
return status;
}
@ -2516,9 +2567,9 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
if (priv->use_riwt) {
ret = stmmac_rx_watchdog(priv, priv->ioaddr, MAX_DMA_RIWT, rx_cnt);
ret = stmmac_rx_watchdog(priv, priv->ioaddr, MIN_DMA_RIWT, rx_cnt);
if (!ret)
priv->rx_riwt = MAX_DMA_RIWT;
priv->rx_riwt = MIN_DMA_RIWT;
}
if (priv->hw->pcs)
@ -2721,7 +2772,7 @@ static int stmmac_release(struct net_device *dev)
* This function fills descriptor and request new descriptors according to
* buffer length to fill
*/
static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des,
int total_len, bool last_segment, u32 queue)
{
struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
@ -2732,11 +2783,18 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
tmp_len = total_len;
while (tmp_len > 0) {
dma_addr_t curr_addr;
tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE);
WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]);
desc = tx_q->dma_tx + tx_q->cur_tx;
desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
curr_addr = des + (total_len - tmp_len);
if (priv->dma_cap.addr64 <= 32)
desc->des0 = cpu_to_le32(curr_addr);
else
stmmac_set_desc_addr(priv, desc, curr_addr);
buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ?
TSO_MAX_BUFF_SIZE : tmp_len;
@ -2782,11 +2840,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
struct stmmac_priv *priv = netdev_priv(dev);
int nfrags = skb_shinfo(skb)->nr_frags;
u32 queue = skb_get_queue_mapping(skb);
unsigned int first_entry, des;
unsigned int first_entry;
struct stmmac_tx_queue *tx_q;
int tmp_pay_len = 0;
u32 pay_len, mss;
u8 proto_hdr_len;
dma_addr_t des;
int i;
tx_q = &priv->tx_queue[queue];
@ -2843,14 +2902,19 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
tx_q->tx_skbuff_dma[first_entry].buf = des;
tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
first->des0 = cpu_to_le32(des);
if (priv->dma_cap.addr64 <= 32) {
first->des0 = cpu_to_le32(des);
/* Fill start of payload in buff2 of first descriptor */
if (pay_len)
first->des1 = cpu_to_le32(des + proto_hdr_len);
/* Fill start of payload in buff2 of first descriptor */
if (pay_len)
first->des1 = cpu_to_le32(des + proto_hdr_len);
/* If needed take extra descriptors to fill the remaining payload */
tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
/* If needed take extra descriptors to fill the remaining payload */
tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
} else {
stmmac_set_desc_addr(priv, first, des);
tmp_pay_len = pay_len;
}
stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue);
@ -2980,12 +3044,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
int i, csum_insertion = 0, is_jumbo = 0;
u32 queue = skb_get_queue_mapping(skb);
int nfrags = skb_shinfo(skb)->nr_frags;
int entry;
unsigned int first_entry;
struct dma_desc *desc, *first;
struct stmmac_tx_queue *tx_q;
unsigned int first_entry;
unsigned int enh_desc;
unsigned int des;
dma_addr_t des;
int entry;
tx_q = &priv->tx_queue[queue];
@ -3283,6 +3347,8 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
entry = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE);
}
rx_q->dirty_rx = entry;
rx_q->rx_tail_addr = rx_q->dma_rx_phy +
(rx_q->dirty_rx * sizeof(struct dma_desc));
stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue);
}
@ -3517,8 +3583,8 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan);
work_done = min(work_done, budget);
if (work_done < budget && napi_complete_done(napi, work_done))
stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
if (work_done < budget)
napi_complete_done(napi, work_done);
/* Force transmission restart */
tx_q = &priv->tx_queue[chan];
@ -4265,6 +4331,24 @@ int stmmac_dvr_probe(struct device *device,
priv->tso = true;
dev_info(priv->device, "TSO feature enabled\n");
}
if (priv->dma_cap.addr64) {
ret = dma_set_mask_and_coherent(device,
DMA_BIT_MASK(priv->dma_cap.addr64));
if (!ret) {
dev_info(priv->device, "Using %d bits DMA width\n",
priv->dma_cap.addr64);
} else {
ret = dma_set_mask_and_coherent(device, DMA_BIT_MASK(32));
if (ret) {
dev_err(priv->device, "Failed to set DMA Mask\n");
goto error_hw_init;
}
priv->dma_cap.addr64 = 32;
}
}
ndev->features |= ndev->hw_features | NETIF_F_HIGHDMA;
ndev->watchdog_timeo = msecs_to_jiffies(watchdog);
#ifdef STMMAC_VLAN_TAG_USED