Merge branch 'nfp-update-TX-path-to-enable-repr-offloads'

Jakub Kicinski says:

====================
nfp: update TX path to enable repr offloads

This set starts with three micro optimizations to the TX path.
The improvement is measurable, but below 1% of CPU utilization.

Patches 4 - 9 add basic TX offloads to representor devices, like
checksum offload or TSO, and remove the unnecessary TX lock and
Qdisc (our representors are software constructs on top of the PF).

The last 2 patches add more info to error messages - id of command
which failed and exact location of incorrect TLVs, very useful for
debugging.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-11-30 13:30:45 -08:00
commit 74315c393f
8 changed files with 214 additions and 38 deletions

View File

@ -126,7 +126,9 @@ nfp_abm_spawn_repr(struct nfp_app *app, struct nfp_abm_link *alink,
reprs = nfp_reprs_get_locked(app, rtype);
WARN(nfp_repr_get_locked(app, reprs, alink->id), "duplicate repr");
rtnl_lock();
rcu_assign_pointer(reprs->reprs[alink->id], netdev);
rtnl_unlock();
nfp_info(app->cpp, "%s Port %d Representor(%s) created\n",
ptype == NFP_PORT_PF_PORT ? "PCIe" : "Phys",
@ -152,7 +154,9 @@ nfp_abm_kill_repr(struct nfp_app *app, struct nfp_abm_link *alink,
netdev = nfp_repr_get_locked(app, reprs, alink->id);
if (!netdev)
return;
rtnl_lock();
rcu_assign_pointer(reprs->reprs[alink->id], NULL);
rtnl_unlock();
synchronize_rcu();
/* Cast to make sure nfp_repr_clean_and_free() takes a nfp_repr */
nfp_repr_clean_and_free((struct nfp_repr *)netdev_priv(netdev));

View File

@ -131,11 +131,45 @@ nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type,
struct nfp_reprs *old;
old = nfp_reprs_get_locked(app, type);
rtnl_lock();
rcu_assign_pointer(app->reprs[type], reprs);
rtnl_unlock();
return old;
}
static void
nfp_app_netdev_feat_change(struct nfp_app *app, struct net_device *netdev)
{
struct nfp_net *nn;
unsigned int type;
if (!nfp_netdev_is_nfp_net(netdev))
return;
nn = netdev_priv(netdev);
if (nn->app != app)
return;
for (type = 0; type < __NFP_REPR_TYPE_MAX; type++) {
struct nfp_reprs *reprs;
unsigned int i;
reprs = rtnl_dereference(app->reprs[type]);
if (!reprs)
continue;
for (i = 0; i < reprs->num_reprs; i++) {
struct net_device *repr;
repr = rtnl_dereference(reprs->reprs[i]);
if (!repr)
continue;
nfp_repr_transfer_features(repr, netdev);
}
}
}
static int
nfp_app_netdev_event(struct notifier_block *nb, unsigned long event, void *ptr)
{
@ -145,6 +179,14 @@ nfp_app_netdev_event(struct notifier_block *nb, unsigned long event, void *ptr)
netdev = netdev_notifier_info_to_dev(ptr);
app = container_of(nb, struct nfp_app, netdev_nb);
/* Handle events common code is interested in */
switch (event) {
case NETDEV_FEAT_CHANGE:
nfp_app_netdev_feat_change(app, netdev);
break;
}
/* Call offload specific handlers */
if (app->type->netdev_event)
return app->type->netdev_event(app, netdev, event, ptr);
return NOTIFY_DONE;

View File

@ -158,6 +158,7 @@ struct nfp_net_tx_desc {
__le16 data_len; /* Length of frame + meta data */
} __packed;
__le32 vals[4];
__le64 vals8[2];
};
};
@ -543,6 +544,7 @@ struct nfp_net_dp {
* @reconfig_timer_active: Timer for reading reconfiguration results is pending
* @reconfig_sync_present: Some thread is performing synchronous reconfig
* @reconfig_timer: Timer for async reading of reconfig results
* @reconfig_in_progress_update: Update FW is processing now (debug only)
* @link_up: Is the link up?
* @link_status_lock: Protects @link_* and ensures atomicity with BAR reading
* @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter
@ -611,6 +613,7 @@ struct nfp_net {
bool reconfig_timer_active;
bool reconfig_sync_present;
struct timer_list reconfig_timer;
u32 reconfig_in_progress_update;
u32 rx_coalesce_usecs;
u32 rx_coalesce_max_frames;

View File

@ -101,6 +101,7 @@ static void nfp_net_reconfig_start(struct nfp_net *nn, u32 update)
/* ensure update is written before pinging HW */
nn_pci_flush(nn);
nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1);
nn->reconfig_in_progress_update = update;
}
/* Pass 0 as update to run posted reconfigs. */
@ -123,10 +124,14 @@ static bool nfp_net_reconfig_check_done(struct nfp_net *nn, bool last_check)
if (reg == 0)
return true;
if (reg & NFP_NET_CFG_UPDATE_ERR) {
nn_err(nn, "Reconfig error: 0x%08x\n", reg);
nn_err(nn, "Reconfig error (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n",
reg, nn->reconfig_in_progress_update,
nn_readl(nn, NFP_NET_CFG_CTRL));
return true;
} else if (last_check) {
nn_err(nn, "Reconfig timeout: 0x%08x\n", reg);
nn_err(nn, "Reconfig timeout (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n",
reg, nn->reconfig_in_progress_update,
nn_readl(nn, NFP_NET_CFG_CTRL));
return true;
}
@ -647,27 +652,29 @@ static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q,
* @txbuf: Pointer to driver soft TX descriptor
* @txd: Pointer to HW TX descriptor
* @skb: Pointer to SKB
* @md_bytes: Prepend length
*
* Set up Tx descriptor for LSO, do nothing for non-LSO skbs.
* Return error on packet header greater than maximum supported LSO header size.
*/
static void nfp_net_tx_tso(struct nfp_net_r_vector *r_vec,
struct nfp_net_tx_buf *txbuf,
struct nfp_net_tx_desc *txd, struct sk_buff *skb)
struct nfp_net_tx_desc *txd, struct sk_buff *skb,
u32 md_bytes)
{
u32 hdrlen;
u32 l3_offset, l4_offset, hdrlen;
u16 mss;
if (!skb_is_gso(skb))
return;
if (!skb->encapsulation) {
txd->l3_offset = skb_network_offset(skb);
txd->l4_offset = skb_transport_offset(skb);
l3_offset = skb_network_offset(skb);
l4_offset = skb_transport_offset(skb);
hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
} else {
txd->l3_offset = skb_inner_network_offset(skb);
txd->l4_offset = skb_inner_transport_offset(skb);
l3_offset = skb_inner_network_offset(skb);
l4_offset = skb_inner_transport_offset(skb);
hdrlen = skb_inner_transport_header(skb) - skb->data +
inner_tcp_hdrlen(skb);
}
@ -676,7 +683,9 @@ static void nfp_net_tx_tso(struct nfp_net_r_vector *r_vec,
txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1);
mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK;
txd->lso_hdrlen = hdrlen;
txd->l3_offset = l3_offset - md_bytes;
txd->l4_offset = l4_offset - md_bytes;
txd->lso_hdrlen = hdrlen - md_bytes;
txd->mss = cpu_to_le16(mss);
txd->flags |= PCIE_DESC_TX_LSO;
@ -786,11 +795,11 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
{
struct nfp_net *nn = netdev_priv(netdev);
const struct skb_frag_struct *frag;
struct nfp_net_tx_desc *txd, txdg;
int f, nr_frags, wr_idx, md_bytes;
struct nfp_net_tx_ring *tx_ring;
struct nfp_net_r_vector *r_vec;
struct nfp_net_tx_buf *txbuf;
struct nfp_net_tx_desc *txd;
struct netdev_queue *nd_q;
struct nfp_net_dp *dp;
dma_addr_t dma_addr;
@ -801,13 +810,13 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
qidx = skb_get_queue_mapping(skb);
tx_ring = &dp->tx_rings[qidx];
r_vec = tx_ring->r_vec;
nd_q = netdev_get_tx_queue(dp->netdev, qidx);
nr_frags = skb_shinfo(skb)->nr_frags;
if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) {
nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n",
qidx, tx_ring->wr_p, tx_ring->rd_p);
nd_q = netdev_get_tx_queue(dp->netdev, qidx);
netif_tx_stop_queue(nd_q);
nfp_net_tx_xmit_more_flush(tx_ring);
u64_stats_update_begin(&r_vec->tx_sync);
@ -851,7 +860,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
txd->lso_hdrlen = 0;
/* Do not reorder - tso may adjust pkt cnt, vlan may override fields */
nfp_net_tx_tso(r_vec, txbuf, txd, skb);
nfp_net_tx_tso(r_vec, txbuf, txd, skb, md_bytes);
nfp_net_tx_csum(dp, r_vec, txbuf, txd, skb);
if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
txd->flags |= PCIE_DESC_TX_VLAN;
@ -860,8 +869,10 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
/* Gather DMA */
if (nr_frags > 0) {
__le64 second_half;
/* all descs must match except for in addr, length and eop */
txdg = *txd;
second_half = txd->vals8[1];
for (f = 0; f < nr_frags; f++) {
frag = &skb_shinfo(skb)->frags[f];
@ -878,11 +889,11 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
tx_ring->txbufs[wr_idx].fidx = f;
txd = &tx_ring->txds[wr_idx];
*txd = txdg;
txd->dma_len = cpu_to_le16(fsize);
nfp_desc_set_dma_addr(txd, dma_addr);
txd->offset_eop |=
(f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0;
txd->offset_eop = md_bytes |
((f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0);
txd->vals8[1] = second_half;
}
u64_stats_update_begin(&r_vec->tx_sync);
@ -892,6 +903,8 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
skb_tx_timestamp(skb);
nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
tx_ring->wr_p += nr_frags + 1;
if (nfp_net_tx_ring_should_stop(tx_ring))
nfp_net_tx_ring_stop(nd_q, tx_ring);
@ -938,14 +951,10 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
const struct skb_frag_struct *frag;
struct netdev_queue *nd_q;
u32 done_pkts = 0, done_bytes = 0;
struct sk_buff *skb;
int todo, nr_frags;
u32 qcp_rd_p;
int fidx;
int idx;
int todo;
if (tx_ring->wr_p == tx_ring->rd_p)
return;
@ -959,26 +968,33 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
while (todo--) {
idx = D_IDX(tx_ring, tx_ring->rd_p++);
const struct skb_frag_struct *frag;
struct nfp_net_tx_buf *tx_buf;
struct sk_buff *skb;
int fidx, nr_frags;
int idx;
skb = tx_ring->txbufs[idx].skb;
idx = D_IDX(tx_ring, tx_ring->rd_p++);
tx_buf = &tx_ring->txbufs[idx];
skb = tx_buf->skb;
if (!skb)
continue;
nr_frags = skb_shinfo(skb)->nr_frags;
fidx = tx_ring->txbufs[idx].fidx;
fidx = tx_buf->fidx;
if (fidx == -1) {
/* unmap head */
dma_unmap_single(dp->dev, tx_ring->txbufs[idx].dma_addr,
dma_unmap_single(dp->dev, tx_buf->dma_addr,
skb_headlen(skb), DMA_TO_DEVICE);
done_pkts += tx_ring->txbufs[idx].pkt_cnt;
done_bytes += tx_ring->txbufs[idx].real_len;
done_pkts += tx_buf->pkt_cnt;
done_bytes += tx_buf->real_len;
} else {
/* unmap fragment */
frag = &skb_shinfo(skb)->frags[fidx];
dma_unmap_page(dp->dev, tx_ring->txbufs[idx].dma_addr,
dma_unmap_page(dp->dev, tx_buf->dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
}
@ -986,9 +1002,9 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
if (fidx == nr_frags - 1)
napi_consume_skb(skb, budget);
tx_ring->txbufs[idx].dma_addr = 0;
tx_ring->txbufs[idx].skb = NULL;
tx_ring->txbufs[idx].fidx = -2;
tx_buf->dma_addr = 0;
tx_buf->skb = NULL;
tx_buf->fidx = -2;
}
tx_ring->qcp_rd_p = qcp_rd_p;
@ -3273,7 +3289,10 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev,
hdrlen = skb_inner_transport_header(skb) - skb->data +
inner_tcp_hdrlen(skb);
if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ))
/* Assume worst case scenario of having longest possible
* metadata prepend - 8B
*/
if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ - 8))
features &= ~NETIF_F_GSO_MASK;
}

View File

@ -41,8 +41,8 @@ int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
data += 4;
if (length % NFP_NET_CFG_TLV_LENGTH_INC) {
dev_err(dev, "TLV size not multiple of %u len:%u\n",
NFP_NET_CFG_TLV_LENGTH_INC, length);
dev_err(dev, "TLV size not multiple of %u offset:%u len:%u\n",
NFP_NET_CFG_TLV_LENGTH_INC, offset, length);
return -EINVAL;
}
if (data + length > end) {
@ -61,14 +61,14 @@ int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
if (!length)
return 0;
dev_err(dev, "END TLV should be empty, has len:%d\n",
length);
dev_err(dev, "END TLV should be empty, has offset:%u len:%d\n",
offset, length);
return -EINVAL;
case NFP_NET_CFG_TLV_TYPE_ME_FREQ:
if (length != 4) {
dev_err(dev,
"ME FREQ TLV should be 4B, is %dB\n",
length);
"ME FREQ TLV should be 4B, is %dB offset:%u\n",
length, offset);
return -EINVAL;
}
@ -90,6 +90,15 @@ int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
FIELD_GET(NFP_NET_CFG_TLV_HEADER_TYPE, hdr),
offset, length);
break;
case NFP_NET_CFG_TLV_TYPE_REPR_CAP:
if (length < 4) {
dev_err(dev, "REPR CAP TLV short %dB < 4B offset:%u\n",
length, offset);
return -EINVAL;
}
caps->repr_cap = readl(data);
break;
default:
if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr))
break;

View File

@ -466,6 +466,10 @@
* Variable, experimental IDs. IDs designated for internal development and
* experiments before a stable TLV ID has been allocated to a feature. Should
* never be present in production firmware.
*
* %NFP_NET_CFG_TLV_TYPE_REPR_CAP:
* Single word, equivalent of %NFP_NET_CFG_CAP for representors, features which
* can be used on representors.
*/
#define NFP_NET_CFG_TLV_TYPE_UNKNOWN 0
#define NFP_NET_CFG_TLV_TYPE_RESERVED 1
@ -474,6 +478,7 @@
#define NFP_NET_CFG_TLV_TYPE_MBOX 4
#define NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL0 5
#define NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL1 6
#define NFP_NET_CFG_TLV_TYPE_REPR_CAP 7
struct device;
@ -482,11 +487,13 @@ struct device;
* @me_freq_mhz: ME clock_freq (MHz)
* @mbox_off: vNIC mailbox area offset
* @mbox_len: vNIC mailbox area length
* @repr_cap: capabilities for representors
*/
struct nfp_net_tlv_caps {
u32 me_freq_mhz;
unsigned int mbox_off;
unsigned int mbox_len;
u32 repr_cap;
};
int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,

View File

@ -11,6 +11,7 @@
#include "nfpcore/nfp_nsp.h"
#include "nfp_app.h"
#include "nfp_main.h"
#include "nfp_net.h"
#include "nfp_net_ctrl.h"
#include "nfp_net_repr.h"
#include "nfp_net_sriov.h"
@ -231,6 +232,27 @@ static int nfp_repr_open(struct net_device *netdev)
return err;
}
static netdev_features_t
nfp_repr_fix_features(struct net_device *netdev, netdev_features_t features)
{
struct nfp_repr *repr = netdev_priv(netdev);
netdev_features_t old_features = features;
netdev_features_t lower_features;
struct net_device *lower_dev;
lower_dev = repr->dst->u.port_info.lower_dev;
lower_features = lower_dev->features;
if (lower_features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))
lower_features |= NETIF_F_HW_CSUM;
features = netdev_intersect_features(features, lower_features);
features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_HW_TC);
features |= NETIF_F_LLTX;
return features;
}
const struct net_device_ops nfp_repr_netdev_ops = {
.ndo_init = nfp_app_ndo_init,
.ndo_uninit = nfp_app_ndo_uninit,
@ -248,10 +270,25 @@ const struct net_device_ops nfp_repr_netdev_ops = {
.ndo_set_vf_spoofchk = nfp_app_set_vf_spoofchk,
.ndo_get_vf_config = nfp_app_get_vf_config,
.ndo_set_vf_link_state = nfp_app_set_vf_link_state,
.ndo_fix_features = nfp_repr_fix_features,
.ndo_set_features = nfp_port_set_features,
.ndo_set_mac_address = eth_mac_addr,
};
void
nfp_repr_transfer_features(struct net_device *netdev, struct net_device *lower)
{
struct nfp_repr *repr = netdev_priv(netdev);
if (repr->dst->u.port_info.lower_dev != lower)
return;
netdev->gso_max_size = lower->gso_max_size;
netdev->gso_max_segs = lower->gso_max_segs;
netdev_update_features(netdev);
}
static void nfp_repr_clean(struct nfp_repr *repr)
{
unregister_netdev(repr->netdev);
@ -281,6 +318,8 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
struct net_device *pf_netdev)
{
struct nfp_repr *repr = netdev_priv(netdev);
struct nfp_net *nn = netdev_priv(pf_netdev);
u32 repr_cap = nn->tlv_caps.repr_cap;
int err;
nfp_repr_set_lockdep_class(netdev);
@ -299,6 +338,55 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops);
/* Set features the lower device can support with representors */
if (repr_cap & NFP_NET_CFG_CTRL_LIVE_ADDR)
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netdev->hw_features = NETIF_F_HIGHDMA;
if (repr_cap & NFP_NET_CFG_CTRL_RXCSUM_ANY)
netdev->hw_features |= NETIF_F_RXCSUM;
if (repr_cap & NFP_NET_CFG_CTRL_TXCSUM)
netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
if (repr_cap & NFP_NET_CFG_CTRL_GATHER)
netdev->hw_features |= NETIF_F_SG;
if ((repr_cap & NFP_NET_CFG_CTRL_LSO && nn->fw_ver.major > 2) ||
repr_cap & NFP_NET_CFG_CTRL_LSO2)
netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
if (repr_cap & NFP_NET_CFG_CTRL_RSS_ANY)
netdev->hw_features |= NETIF_F_RXHASH;
if (repr_cap & NFP_NET_CFG_CTRL_VXLAN) {
if (repr_cap & NFP_NET_CFG_CTRL_LSO)
netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
}
if (repr_cap & NFP_NET_CFG_CTRL_NVGRE) {
if (repr_cap & NFP_NET_CFG_CTRL_LSO)
netdev->hw_features |= NETIF_F_GSO_GRE;
}
if (repr_cap & (NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE))
netdev->hw_enc_features = netdev->hw_features;
netdev->vlan_features = netdev->hw_features;
if (repr_cap & NFP_NET_CFG_CTRL_RXVLAN)
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
if (repr_cap & NFP_NET_CFG_CTRL_TXVLAN) {
if (repr_cap & NFP_NET_CFG_CTRL_LSO2)
netdev_warn(netdev, "Device advertises both TSO2 and TXVLAN. Refusing to enable TXVLAN.\n");
else
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
}
if (repr_cap & NFP_NET_CFG_CTRL_CTAG_FILTER)
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
netdev->features = netdev->hw_features;
/* Advertise but disable TSO by default. */
netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
netdev->gso_max_segs = NFP_NET_LSO_MAX_SEGS;
netdev->priv_flags |= IFF_NO_QUEUE;
netdev->features |= NETIF_F_LLTX;
if (nfp_app_has_tc(app)) {
netdev->features |= NETIF_F_HW_TC;
netdev->hw_features |= NETIF_F_HW_TC;
@ -442,7 +530,9 @@ int nfp_reprs_resync_phys_ports(struct nfp_app *app)
continue;
nfp_app_repr_preclean(app, netdev);
rtnl_lock();
rcu_assign_pointer(reprs->reprs[i], NULL);
rtnl_unlock();
synchronize_rcu();
nfp_repr_clean(repr);
}

View File

@ -92,6 +92,8 @@ nfp_repr_get_locked(struct nfp_app *app, struct nfp_reprs *set,
unsigned int id);
void nfp_repr_inc_rx_stats(struct net_device *netdev, unsigned int len);
void
nfp_repr_transfer_features(struct net_device *netdev, struct net_device *lower);
int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
u32 cmsg_port_id, struct nfp_port *port,
struct net_device *pf_netdev);