Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue

Jeff Kirsher says:

====================
100GbE Intel Wired LAN Driver Updates 2019-11-04

This series contains updates to the ice driver only.

Anirudh refactors the code to reduce the kernel configuration flags and
introduces ice_base.c file.

Maciej does additional refactoring on the configuring of transmit
rings so that we are not configuring per each traffic class flow.
Added support for XDP in the ice driver.  Provides additional
re-organizing of the code in preparation for adding build_skb() support
in the driver.  Adjusted the computational padding logic for headroom
and tailroom to better support build_skb(), which also aligns with the
logic in other Intel LAN drivers.  Added build_skb support and make use
of the XDP's data_meta.

Krzysztof refactors the driver to prepare for AF_XDP support in the
driver and then adds support for AF_XDP.

v2: Updated patch 3 of the series based on community feedback with the
    following changes...
    - return -EOPNOTSUPP instead of ENOTSUPP for too large MTU which makes
      it impossible to attach XDP prog
    - don't check for case when there's no XDP prog currently on interface
      and ice_xdp() is called with NULL bpf_prog; this happens when user
      does "ip link set eth0 xdp off" and no prog is present on VSI; no need
      for that as it is handled by higher layer
    - drop the extack message for unknown xdp->command
    - use the smp_processor_id() for accessing the XDP Tx ring for XDP_TX
      action
    - don't leave the interface in downed state in case of any failure
      during the XDP Tx resources handling
    - undo rename of ice_build_ctob
    The above changes caused a ripple effect in patches 4 & 5 to update
    references to ice_build_ctob() which are now build_ctob()
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-11-05 13:40:12 -08:00
commit 39069faac2
16 changed files with 3553 additions and 1150 deletions

View File

@ -13,9 +13,12 @@ ice-y := ice_main.o \
ice_nvm.o \
ice_switch.o \
ice_sched.o \
ice_base.o \
ice_lib.o \
ice_txrx_lib.o \
ice_txrx.o \
ice_flex_pipe.o \
ice_ethtool.o
ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o
ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o

View File

@ -29,10 +29,13 @@
#include <linux/ip.h>
#include <linux/sctp.h>
#include <linux/ipv6.h>
#include <linux/pkt_sched.h>
#include <linux/if_bridge.h>
#include <linux/ctype.h>
#include <linux/bpf.h>
#include <linux/avf/virtchnl.h>
#include <net/ipv6.h>
#include <net/xdp_sock.h>
#include "ice_devids.h"
#include "ice_type.h"
#include "ice_txrx.h"
@ -42,6 +45,7 @@
#include "ice_sched.h"
#include "ice_virtchnl_pf.h"
#include "ice_sriov.h"
#include "ice_xsk.h"
extern const char ice_drv_ver[];
#define ICE_BAR0 0
@ -78,8 +82,7 @@ extern const char ice_drv_ver[];
#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - \
(ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)))
#define ICE_MAX_MTU (ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD)
#define ICE_UP_TABLE_TRANSLATE(val, i) \
(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
@ -127,6 +130,14 @@ extern const char ice_drv_ver[];
ICE_PROMISC_VLAN_TX | \
ICE_PROMISC_VLAN_RX)
struct ice_txq_meta {
u32 q_teid; /* Tx-scheduler element identifier */
u16 q_id; /* Entry in VSI's txq_map bitmap */
u16 q_handle; /* Relative index of Tx queue within TC */
u16 vsi_idx; /* VSI index that Tx queue belongs to */
u8 tc; /* TC number that Tx queue belongs to */
};
struct ice_tc_info {
u16 qoffset;
u16 qcount_tx;
@ -274,6 +285,13 @@ struct ice_vsi {
u16 num_rx_desc;
u16 num_tx_desc;
struct ice_tc_cfg tc_cfg;
struct bpf_prog *xdp_prog;
struct ice_ring **xdp_rings; /* XDP ring array */
u16 num_xdp_txq; /* Used XDP queues */
u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
struct xdp_umem **xsk_umems;
u16 num_xsk_umems_used;
u16 num_xsk_umems;
} ____cacheline_internodealigned_in_smp;
/* struct that defines an interrupt vector */
@ -313,6 +331,7 @@ enum ice_pf_flags {
ICE_FLAG_NO_MEDIA,
ICE_FLAG_FW_LLDP_AGENT,
ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */
ICE_FLAG_LEGACY_RX,
ICE_PF_FLAGS_NBITS /* must be last */
};
@ -417,6 +436,37 @@ static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
return np->vsi->back;
}
static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
{
return !!vsi->xdp_prog;
}
static inline void ice_set_ring_xdp(struct ice_ring *ring)
{
ring->flags |= ICE_TX_FLAGS_RING_XDP;
}
/**
* ice_xsk_umem - get XDP UMEM bound to a ring
* @ring - ring to use
*
* Returns a pointer to xdp_umem structure if there is an UMEM present,
* NULL otherwise.
*/
static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
{
struct xdp_umem **umems = ring->vsi->xsk_umems;
int qid = ring->q_index;
if (ice_ring_is_xdp(ring))
qid -= ring->vsi->num_xdp_txq;
if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi))
return NULL;
return umems[qid];
}
/**
* ice_get_main_vsi - Get the PF VSI
* @pf: PF instance
@ -443,6 +493,11 @@ int ice_up(struct ice_vsi *vsi);
int ice_down(struct ice_vsi *vsi);
int ice_vsi_cfg(struct ice_vsi *vsi);
struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
int ice_destroy_xdp_rings(struct ice_vsi *vsi);
int
ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);

View File

@ -0,0 +1,857 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019, Intel Corporation. */
#include "ice_base.h"
#include "ice_dcb_lib.h"
/**
* __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
* @qs_cfg: gathered variables needed for PF->VSI queues assignment
*
* Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
*/
static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
{
int offset, i;
mutex_lock(qs_cfg->qs_mutex);
offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
0, qs_cfg->q_count, 0);
if (offset >= qs_cfg->pf_map_size) {
mutex_unlock(qs_cfg->qs_mutex);
return -ENOMEM;
}
bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
for (i = 0; i < qs_cfg->q_count; i++)
qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
mutex_unlock(qs_cfg->qs_mutex);
return 0;
}
/**
* __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
* @qs_cfg: gathered variables needed for pf->vsi queues assignment
*
* Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
*/
static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
{
int i, index = 0;
mutex_lock(qs_cfg->qs_mutex);
for (i = 0; i < qs_cfg->q_count; i++) {
index = find_next_zero_bit(qs_cfg->pf_map,
qs_cfg->pf_map_size, index);
if (index >= qs_cfg->pf_map_size)
goto err_scatter;
set_bit(index, qs_cfg->pf_map);
qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
}
mutex_unlock(qs_cfg->qs_mutex);
return 0;
err_scatter:
for (index = 0; index < i; index++) {
clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
}
mutex_unlock(qs_cfg->qs_mutex);
return -ENOMEM;
}
/**
* ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
* @pf: the PF being configured
* @pf_q: the PF queue
* @ena: enable or disable state of the queue
*
* This routine will wait for the given Rx queue of the PF to reach the
* enabled or disabled state.
* Returns -ETIMEDOUT in case of failing to reach the requested state after
* multiple retries; else will return 0 in case of success.
*/
static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
{
int i;
for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
QRX_CTRL_QENA_STAT_M))
return 0;
usleep_range(20, 40);
}
return -ETIMEDOUT;
}
/**
* ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
* @vsi: the VSI being configured
* @v_idx: index of the vector in the VSI struct
*
* We allocate one q_vector. If allocation fails we return -ENOMEM.
*/
static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
{
struct ice_pf *pf = vsi->back;
struct ice_q_vector *q_vector;
/* allocate q_vector */
q_vector = devm_kzalloc(&pf->pdev->dev, sizeof(*q_vector), GFP_KERNEL);
if (!q_vector)
return -ENOMEM;
q_vector->vsi = vsi;
q_vector->v_idx = v_idx;
if (vsi->type == ICE_VSI_VF)
goto out;
/* only set affinity_mask if the CPU is online */
if (cpu_online(v_idx))
cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
/* This will not be called in the driver load path because the netdev
* will not be created yet. All other cases with register the NAPI
* handler here (i.e. resume, reset/rebuild, etc.)
*/
if (vsi->netdev)
netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll,
NAPI_POLL_WEIGHT);
out:
/* tie q_vector and VSI together */
vsi->q_vectors[v_idx] = q_vector;
return 0;
}
/**
* ice_free_q_vector - Free memory allocated for a specific interrupt vector
* @vsi: VSI having the memory freed
* @v_idx: index of the vector to be freed
*/
static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
{
struct ice_q_vector *q_vector;
struct ice_pf *pf = vsi->back;
struct ice_ring *ring;
if (!vsi->q_vectors[v_idx]) {
dev_dbg(&pf->pdev->dev, "Queue vector at index %d not found\n",
v_idx);
return;
}
q_vector = vsi->q_vectors[v_idx];
ice_for_each_ring(ring, q_vector->tx)
ring->q_vector = NULL;
ice_for_each_ring(ring, q_vector->rx)
ring->q_vector = NULL;
/* only VSI with an associated netdev is set up with NAPI */
if (vsi->netdev)
netif_napi_del(&q_vector->napi);
devm_kfree(&pf->pdev->dev, q_vector);
vsi->q_vectors[v_idx] = NULL;
}
/**
* ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
* @hw: board specific structure
*/
static void ice_cfg_itr_gran(struct ice_hw *hw)
{
u32 regval = rd32(hw, GLINT_CTL);
/* no need to update global register if ITR gran is already set */
if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
(((regval & GLINT_CTL_ITR_GRAN_200_M) >>
GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
(((regval & GLINT_CTL_ITR_GRAN_100_M) >>
GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
(((regval & GLINT_CTL_ITR_GRAN_50_M) >>
GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
(((regval & GLINT_CTL_ITR_GRAN_25_M) >>
GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
return;
regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
GLINT_CTL_ITR_GRAN_200_M) |
((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
GLINT_CTL_ITR_GRAN_100_M) |
((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
GLINT_CTL_ITR_GRAN_50_M) |
((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
GLINT_CTL_ITR_GRAN_25_M);
wr32(hw, GLINT_CTL, regval);
}
/**
* ice_calc_q_handle - calculate the queue handle
* @vsi: VSI that ring belongs to
* @ring: ring to get the absolute queue index
* @tc: traffic class number
*/
static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc)
{
WARN_ONCE(ice_ring_is_xdp(ring) && tc,
"XDP ring can't belong to TC other than 0");
/* Idea here for calculation is that we subtract the number of queue
* count from TC that ring belongs to from it's absolute queue index
* and as a result we get the queue's index within TC.
*/
return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset;
}
/**
* ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
* @ring: The Tx ring to configure
* @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
* @pf_q: queue index in the PF space
*
* Configure the Tx descriptor ring in TLAN context.
*/
static void
ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
{
struct ice_vsi *vsi = ring->vsi;
struct ice_hw *hw = &vsi->back->hw;
tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
tlan_ctx->port_num = vsi->port_info->lport;
/* Transmit Queue Length */
tlan_ctx->qlen = ring->count;
ice_set_cgd_num(tlan_ctx, ring);
/* PF number */
tlan_ctx->pf_num = hw->pf_id;
/* queue belongs to a specific VSI type
* VF / VM index should be programmed per vmvf_type setting:
* for vmvf_type = VF, it is VF number between 0-256
* for vmvf_type = VM, it is VM number between 0-767
* for PF or EMP this field should be set to zero
*/
switch (vsi->type) {
case ICE_VSI_LB:
/* fall through */
case ICE_VSI_PF:
tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
break;
case ICE_VSI_VF:
/* Firmware expects vmvf_num to be absolute VF ID */
tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id;
tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
break;
default:
return;
}
/* make sure the context is associated with the right VSI */
tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
tlan_ctx->tso_ena = ICE_TX_LEGACY;
tlan_ctx->tso_qnum = pf_q;
/* Legacy or Advanced Host Interface:
* 0: Advanced Host Interface
* 1: Legacy Host Interface
*/
tlan_ctx->legacy_int = ICE_TX_LEGACY;
}
/**
* ice_setup_rx_ctx - Configure a receive ring context
* @ring: The Rx ring to configure
*
* Configure the Rx descriptor ring in RLAN context.
*/
int ice_setup_rx_ctx(struct ice_ring *ring)
{
int chain_len = ICE_MAX_CHAINED_RX_BUFS;
struct ice_vsi *vsi = ring->vsi;
u32 rxdid = ICE_RXDID_FLEX_NIC;
struct ice_rlan_ctx rlan_ctx;
struct ice_hw *hw;
u32 regval;
u16 pf_q;
int err;
hw = &vsi->back->hw;
/* what is Rx queue number in global space of 2K Rx queues */
pf_q = vsi->rxq_map[ring->q_index];
/* clear the context structure first */
memset(&rlan_ctx, 0, sizeof(rlan_ctx));
ring->rx_buf_len = vsi->rx_buf_len;
if (ring->vsi->type == ICE_VSI_PF) {
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index);
ring->xsk_umem = ice_xsk_umem(ring);
if (ring->xsk_umem) {
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
XDP_PACKET_HEADROOM;
/* For AF_XDP ZC, we disallow packets to span on
* multiple buffers, thus letting us skip that
* handling in the fast-path.
*/
chain_len = 1;
ring->zca.free = ice_zca_free;
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_ZERO_COPY,
&ring->zca);
if (err)
return err;
dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
ring->q_index);
} else {
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
xdp_rxq_info_reg(&ring->xdp_rxq,
ring->netdev,
ring->q_index);
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
NULL);
if (err)
return err;
}
}
/* Receive Queue Base Address.
* Indicates the starting address of the descriptor queue defined in
* 128 Byte units.
*/
rlan_ctx.base = ring->dma >> 7;
rlan_ctx.qlen = ring->count;
/* Receive Packet Data Buffer Size.
* The Packet Data Buffer Size is defined in 128 byte units.
*/
rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
/* use 32 byte descriptors */
rlan_ctx.dsize = 1;
/* Strip the Ethernet CRC bytes before the packet is posted to host
* memory.
*/
rlan_ctx.crcstrip = 1;
/* L2TSEL flag defines the reported L2 Tags in the receive descriptor */
rlan_ctx.l2tsel = 1;
rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
/* This controls whether VLAN is stripped from inner headers
* The VLAN in the inner L2 header is stripped to the receive
* descriptor if enabled by this flag.
*/
rlan_ctx.showiv = 0;
/* Max packet size for this queue - must not be set to a larger value
* than 5 x DBUF
*/
rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
chain_len * ring->rx_buf_len);
/* Rx queue threshold in units of 64 */
rlan_ctx.lrxqthresh = 1;
/* Enable Flexible Descriptors in the queue context which
* allows this driver to select a specific receive descriptor format
*/
if (vsi->type != ICE_VSI_VF) {
regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
QRXFLXP_CNTXT_RXDID_IDX_M;
/* increasing context priority to pick up profile ID;
* default is 0x01; setting to 0x03 to ensure profile
* is programming if prev context is of same priority
*/
regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
QRXFLXP_CNTXT_RXDID_PRIO_M;
wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
}
/* Absolute queue number out of 2K needs to be passed */
err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
if (err) {
dev_err(&vsi->back->pdev->dev,
"Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
pf_q, err);
return -EIO;
}
if (vsi->type == ICE_VSI_VF)
return 0;
/* configure Rx buffer alignment */
if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
ice_clear_ring_build_skb_ena(ring);
else
ice_set_ring_build_skb_ena(ring);
/* init queue specific tail register */
ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
writel(0, ring->tail);
err = ring->xsk_umem ?
ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) :
ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
if (err)
dev_info(&vsi->back->pdev->dev,
"Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
ring->xsk_umem ? "UMEM enabled " : "",
ring->q_index, pf_q);
return 0;
}
/**
* __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
* @qs_cfg: gathered variables needed for pf->vsi queues assignment
*
* This function first tries to find contiguous space. If it is not successful,
* it tries with the scatter approach.
*
* Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
*/
int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
{
int ret = 0;
ret = __ice_vsi_get_qs_contig(qs_cfg);
if (ret) {
/* contig failed, so try with scatter approach */
qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
qs_cfg->scatter_count);
ret = __ice_vsi_get_qs_sc(qs_cfg);
}
return ret;
}
/**
* ice_vsi_ctrl_rx_ring - Start or stop a VSI's Rx ring
* @vsi: the VSI being configured
* @ena: start or stop the Rx rings
* @rxq_idx: Rx queue index
*/
int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
{
int pf_q = vsi->rxq_map[rxq_idx];
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
int ret = 0;
u32 rx_reg;
rx_reg = rd32(hw, QRX_CTRL(pf_q));
/* Skip if the queue is already in the requested state */
if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
return 0;
/* turn on/off the queue */
if (ena)
rx_reg |= QRX_CTRL_QENA_REQ_M;
else
rx_reg &= ~QRX_CTRL_QENA_REQ_M;
wr32(hw, QRX_CTRL(pf_q), rx_reg);
/* wait for the change to finish */
ret = ice_pf_rxq_wait(pf, pf_q, ena);
if (ret)
dev_err(&pf->pdev->dev,
"VSI idx %d Rx ring %d %sable timeout\n",
vsi->idx, pf_q, (ena ? "en" : "dis"));
return ret;
}
/**
* ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
* @vsi: the VSI being configured
*
* We allocate one q_vector per queue interrupt. If allocation fails we
* return -ENOMEM.
*/
int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
{
struct ice_pf *pf = vsi->back;
int v_idx = 0, num_q_vectors;
int err;
if (vsi->q_vectors[0]) {
dev_dbg(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
vsi->vsi_num);
return -EEXIST;
}
num_q_vectors = vsi->num_q_vectors;
for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
err = ice_vsi_alloc_q_vector(vsi, v_idx);
if (err)
goto err_out;
}
return 0;
err_out:
while (v_idx--)
ice_free_q_vector(vsi, v_idx);
dev_err(&pf->pdev->dev,
"Failed to allocate %d q_vector for VSI %d, ret=%d\n",
vsi->num_q_vectors, vsi->vsi_num, err);
vsi->num_q_vectors = 0;
return err;
}
/**
* ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
* @vsi: the VSI being configured
*
* This function maps descriptor rings to the queue-specific vectors allotted
* through the MSI-X enabling code. On a constrained vector budget, we map Tx
* and Rx rings to the vector as "efficiently" as possible.
*/
void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
{
int q_vectors = vsi->num_q_vectors;
int tx_rings_rem, rx_rings_rem;
int v_id;
/* initially assigning remaining rings count to VSIs num queue value */
tx_rings_rem = vsi->num_txq;
rx_rings_rem = vsi->num_rxq;
for (v_id = 0; v_id < q_vectors; v_id++) {
struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
/* Tx rings mapping to vector */
tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
q_vector->num_ring_tx = tx_rings_per_v;
q_vector->tx.ring = NULL;
q_vector->tx.itr_idx = ICE_TX_ITR;
q_base = vsi->num_txq - tx_rings_rem;
for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
struct ice_ring *tx_ring = vsi->tx_rings[q_id];
tx_ring->q_vector = q_vector;
tx_ring->next = q_vector->tx.ring;
q_vector->tx.ring = tx_ring;
}
tx_rings_rem -= tx_rings_per_v;
/* Rx rings mapping to vector */
rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
q_vector->num_ring_rx = rx_rings_per_v;
q_vector->rx.ring = NULL;
q_vector->rx.itr_idx = ICE_RX_ITR;
q_base = vsi->num_rxq - rx_rings_rem;
for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
struct ice_ring *rx_ring = vsi->rx_rings[q_id];
rx_ring->q_vector = q_vector;
rx_ring->next = q_vector->rx.ring;
q_vector->rx.ring = rx_ring;
}
rx_rings_rem -= rx_rings_per_v;
}
}
/**
* ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
* @vsi: the VSI having memory freed
*/
void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
{
int v_idx;
ice_for_each_q_vector(vsi, v_idx)
ice_free_q_vector(vsi, v_idx);
}
/**
* ice_vsi_cfg_txq - Configure single Tx queue
* @vsi: the VSI that queue belongs to
* @ring: Tx ring to be configured
* @qg_buf: queue group buffer
*/
int
ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
struct ice_aqc_add_tx_qgrp *qg_buf)
{
struct ice_tlan_ctx tlan_ctx = { 0 };
struct ice_aqc_add_txqs_perq *txq;
struct ice_pf *pf = vsi->back;
u8 buf_len = sizeof(*qg_buf);
enum ice_status status;
u16 pf_q;
u8 tc;
pf_q = ring->reg_idx;
ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
/* copy context contents into the qg_buf */
qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
ice_tlan_ctx_info);
/* init queue specific tail reg. It is referred as
* transmit comm scheduler queue doorbell.
*/
ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
if (IS_ENABLED(CONFIG_DCB))
tc = ring->dcb_tc;
else
tc = 0;
/* Add unique software queue handle of the Tx queue per
* TC into the VSI Tx ring
*/
ring->q_handle = ice_calc_q_handle(vsi, ring, tc);
status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
1, qg_buf, buf_len, NULL);
if (status) {
dev_err(&pf->pdev->dev,
"Failed to set LAN Tx queue context, error: %d\n",
status);
return -ENODEV;
}
/* Add Tx Queue TEID into the VSI Tx ring from the
* response. This will complete configuring and
* enabling the queue.
*/
txq = &qg_buf->txqs[0];
if (pf_q == le16_to_cpu(txq->txq_id))
ring->txq_teid = le32_to_cpu(txq->q_teid);
return 0;
}
/**
* ice_cfg_itr - configure the initial interrupt throttle values
* @hw: pointer to the HW structure
* @q_vector: interrupt vector that's being configured
*
* Configure interrupt throttling values for the ring containers that are
* associated with the interrupt vector passed in.
*/
void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
{
ice_cfg_itr_gran(hw);
if (q_vector->num_ring_rx) {
struct ice_ring_container *rc = &q_vector->rx;
/* if this value is set then don't overwrite with default */
if (!rc->itr_setting)
rc->itr_setting = ICE_DFLT_RX_ITR;
rc->target_itr = ITR_TO_REG(rc->itr_setting);
rc->next_update = jiffies + 1;
rc->current_itr = rc->target_itr;
wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
}
if (q_vector->num_ring_tx) {
struct ice_ring_container *rc = &q_vector->tx;
/* if this value is set then don't overwrite with default */
if (!rc->itr_setting)
rc->itr_setting = ICE_DFLT_TX_ITR;
rc->target_itr = ITR_TO_REG(rc->itr_setting);
rc->next_update = jiffies + 1;
rc->current_itr = rc->target_itr;
wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S);
}
}
/**
* ice_cfg_txq_interrupt - configure interrupt on Tx queue
* @vsi: the VSI being configured
* @txq: Tx queue being mapped to MSI-X vector
* @msix_idx: MSI-X vector index within the function
* @itr_idx: ITR index of the interrupt cause
*
* Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
* within the function space.
*/
void
ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
{
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
u32 val;
itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
if (ice_is_xdp_ena_vsi(vsi)) {
u32 xdp_txq = txq + vsi->num_xdp_txq;
wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]),
val);
}
ice_flush(hw);
}
/**
* ice_cfg_rxq_interrupt - configure interrupt on Rx queue
* @vsi: the VSI being configured
* @rxq: Rx queue being mapped to MSI-X vector
* @msix_idx: MSI-X vector index within the function
* @itr_idx: ITR index of the interrupt cause
*
* Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
* within the function space.
*/
void
ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
{
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
u32 val;
itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
ice_flush(hw);
}
/**
* ice_trigger_sw_intr - trigger a software interrupt
* @hw: pointer to the HW structure
* @q_vector: interrupt vector to trigger the software interrupt for
*/
void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
{
wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
(ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
GLINT_DYN_CTL_SWINT_TRIG_M |
GLINT_DYN_CTL_INTENA_M);
}
/**
* ice_vsi_stop_tx_ring - Disable single Tx ring
* @vsi: the VSI being configured
* @rst_src: reset source
* @rel_vmvf_num: Relative ID of VF/VM
* @ring: Tx ring to be stopped
* @txq_meta: Meta data of Tx ring to be stopped
*/
int
ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
u16 rel_vmvf_num, struct ice_ring *ring,
struct ice_txq_meta *txq_meta)
{
struct ice_pf *pf = vsi->back;
struct ice_q_vector *q_vector;
struct ice_hw *hw = &pf->hw;
enum ice_status status;
u32 val;
/* clear cause_ena bit for disabled queues */
val = rd32(hw, QINT_TQCTL(ring->reg_idx));
val &= ~QINT_TQCTL_CAUSE_ENA_M;
wr32(hw, QINT_TQCTL(ring->reg_idx), val);
/* software is expected to wait for 100 ns */
ndelay(100);
/* trigger a software interrupt for the vector
* associated to the queue to schedule NAPI handler
*/
q_vector = ring->q_vector;
if (q_vector)
ice_trigger_sw_intr(hw, q_vector);
status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
txq_meta->tc, 1, &txq_meta->q_handle,
&txq_meta->q_id, &txq_meta->q_teid, rst_src,
rel_vmvf_num, NULL);
/* if the disable queue command was exercised during an
* active reset flow, ICE_ERR_RESET_ONGOING is returned.
* This is not an error as the reset operation disables
* queues at the hardware level anyway.
*/
if (status == ICE_ERR_RESET_ONGOING) {
dev_dbg(&vsi->back->pdev->dev,
"Reset in progress. LAN Tx queues already disabled\n");
} else if (status == ICE_ERR_DOES_NOT_EXIST) {
dev_dbg(&vsi->back->pdev->dev,
"LAN Tx queues do not exist, nothing to disable\n");
} else if (status) {
dev_err(&vsi->back->pdev->dev,
"Failed to disable LAN Tx queues, error: %d\n", status);
return -ENODEV;
}
return 0;
}
/**
* ice_fill_txq_meta - Prepare the Tx queue's meta data
* @vsi: VSI that ring belongs to
* @ring: ring that txq_meta will be based on
* @txq_meta: a helper struct that wraps Tx queue's information
*
* Set up a helper struct that will contain all the necessary fields that
* are needed for stopping Tx queue
*/
void
ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
struct ice_txq_meta *txq_meta)
{
u8 tc;
if (IS_ENABLED(CONFIG_DCB))
tc = ring->dcb_tc;
else
tc = 0;
txq_meta->q_id = ring->reg_idx;
txq_meta->q_teid = ring->txq_teid;
txq_meta->q_handle = ring->q_handle;
txq_meta->vsi_idx = vsi->idx;
txq_meta->tc = tc;
}

View File

@ -0,0 +1,31 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2019, Intel Corporation. */
#ifndef _ICE_BASE_H_
#define _ICE_BASE_H_
#include "ice.h"
int ice_setup_rx_ctx(struct ice_ring *ring);
int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg);
int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi);
void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
int
ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
struct ice_aqc_add_tx_qgrp *qg_buf);
void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector);
void
ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
void
ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
int
ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
u16 rel_vmvf_num, struct ice_ring *ring,
struct ice_txq_meta *txq_meta);
void
ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
struct ice_txq_meta *txq_meta);
#endif /* _ICE_BASE_H_ */

View File

@ -5,6 +5,7 @@
#define _ICE_DCB_LIB_H_
#include "ice.h"
#include "ice_base.h"
#include "ice_lib.h"
#ifdef CONFIG_DCB

View File

@ -156,6 +156,7 @@ struct ice_priv_flag {
static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
};
#define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags)
@ -623,7 +624,7 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
continue;
rx_buf = &rx_ring->rx_buf[i];
received_buf = page_address(rx_buf->page);
received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
if (ice_lbtest_check_frame(received_buf))
valid_frames++;
@ -1256,6 +1257,11 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
"Fail to enable MIB change events\n");
}
}
if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
/* down and up VSI so that changes of Rx cfg are reflected. */
ice_down(vsi);
ice_up(vsi);
}
clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
return ret;
}
@ -2577,6 +2583,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
{
struct ice_ring *tx_rings = NULL, *rx_rings = NULL;
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_ring *xdp_rings = NULL;
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
int i, timeout = 50, err = 0;
@ -2611,6 +2618,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
return 0;
}
/* If there is a AF_XDP UMEM attached to any of Rx rings,
* disallow changing the number of descriptors -- regardless
* if the netdev is running or not.
*/
if (ice_xsk_any_rx_ring_ena(vsi))
return -EBUSY;
while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
timeout--;
if (!timeout)
@ -2624,6 +2638,11 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
vsi->tx_rings[i]->count = new_tx_cnt;
for (i = 0; i < vsi->alloc_rxq; i++)
vsi->rx_rings[i]->count = new_rx_cnt;
if (ice_is_xdp_ena_vsi(vsi))
for (i = 0; i < vsi->num_xdp_txq; i++)
vsi->xdp_rings[i]->count = new_tx_cnt;
vsi->num_tx_desc = new_tx_cnt;
vsi->num_rx_desc = new_rx_cnt;
netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
goto done;
}
@ -2650,15 +2669,43 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
tx_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&tx_rings[i]);
if (err) {
while (i) {
i--;
while (i--)
ice_clean_tx_ring(&tx_rings[i]);
}
devm_kfree(&pf->pdev->dev, tx_rings);
goto done;
}
}
if (!ice_is_xdp_ena_vsi(vsi))
goto process_rx;
/* alloc updated XDP resources */
netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n",
vsi->xdp_rings[0]->count, new_tx_cnt);
xdp_rings = devm_kcalloc(&pf->pdev->dev, vsi->num_xdp_txq,
sizeof(*xdp_rings), GFP_KERNEL);
if (!xdp_rings) {
err = -ENOMEM;
goto free_tx;
}
for (i = 0; i < vsi->num_xdp_txq; i++) {
/* clone ring and setup updated count */
xdp_rings[i] = *vsi->xdp_rings[i];
xdp_rings[i].count = new_tx_cnt;
xdp_rings[i].desc = NULL;
xdp_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&xdp_rings[i]);
if (err) {
while (i--)
ice_clean_tx_ring(&xdp_rings[i]);
devm_kfree(&pf->pdev->dev, xdp_rings);
goto free_tx;
}
ice_set_ring_xdp(&xdp_rings[i]);
}
process_rx:
if (new_rx_cnt == vsi->rx_rings[0]->count)
goto process_link;
@ -2737,6 +2784,16 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
devm_kfree(&pf->pdev->dev, rx_rings);
}
if (xdp_rings) {
for (i = 0; i < vsi->num_xdp_txq; i++) {
ice_free_tx_ring(vsi->xdp_rings[i]);
*vsi->xdp_rings[i] = xdp_rings[i];
}
devm_kfree(&pf->pdev->dev, xdp_rings);
}
vsi->num_tx_desc = new_tx_cnt;
vsi->num_rx_desc = new_rx_cnt;
ice_up(vsi);
}
goto done;

File diff suppressed because it is too large Load Diff

View File

@ -6,19 +6,6 @@
#include "ice.h"
struct ice_txq_meta {
/* Tx-scheduler element identifier */
u32 q_teid;
/* Entry in VSI's txq_map bitmap */
u16 q_id;
/* Relative index of Tx queue within TC */
u16 q_handle;
/* VSI index that Tx queue belongs to */
u16 vsi_idx;
/* TC number that Tx queue belongs to */
u8 tc;
};
int
ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
const u8 *macaddr);
@ -33,24 +20,6 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
void ice_vsi_cfg_msix(struct ice_vsi *vsi);
#ifdef CONFIG_PCI_IOV
void
ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
void
ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
int
ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
u16 rel_vmvf_num, struct ice_ring *ring,
struct ice_txq_meta *txq_meta);
void ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring,
struct ice_txq_meta *txq_meta);
int ice_vsi_ctrl_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
#endif /* CONFIG_PCI_IOV */
int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid);
int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
@ -67,6 +36,10 @@ int
ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
u16 rel_vmvf_num);
int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi);
int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
@ -98,16 +71,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi);
bool ice_is_reset_in_progress(unsigned long *state);
void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
void ice_vsi_put_qs(struct ice_vsi *vsi);
#ifdef CONFIG_DCB
void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
#endif /* CONFIG_DCB */
void ice_vsi_dis_irq(struct ice_vsi *vsi);
void ice_vsi_free_irq(struct ice_vsi *vsi);
@ -118,6 +83,12 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
char *ice_nvm_version_str(struct ice_hw *hw);

View File

@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include "ice.h"
#include "ice_base.h"
#include "ice_lib.h"
#include "ice_dcb_lib.h"
@ -1660,6 +1661,324 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
return err;
}
/**
* ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
* @vsi: VSI to setup Tx rings used by XDP
*
* Return 0 on success and negative value on error
*/
static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
{
struct device *dev = &vsi->back->pdev->dev;
int i;
for (i = 0; i < vsi->num_xdp_txq; i++) {
u16 xdp_q_idx = vsi->alloc_txq + i;
struct ice_ring *xdp_ring;
xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL);
if (!xdp_ring)
goto free_xdp_rings;
xdp_ring->q_index = xdp_q_idx;
xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
xdp_ring->ring_active = false;
xdp_ring->vsi = vsi;
xdp_ring->netdev = NULL;
xdp_ring->dev = dev;
xdp_ring->count = vsi->num_tx_desc;
vsi->xdp_rings[i] = xdp_ring;
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
ice_set_ring_xdp(xdp_ring);
xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
}
return 0;
free_xdp_rings:
for (; i >= 0; i--)
if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
ice_free_tx_ring(vsi->xdp_rings[i]);
return -ENOMEM;
}
/**
* ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
* @vsi: VSI to set the bpf prog on
* @prog: the bpf prog pointer
*/
static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
{
struct bpf_prog *old_prog;
int i;
old_prog = xchg(&vsi->xdp_prog, prog);
if (old_prog)
bpf_prog_put(old_prog);
ice_for_each_rxq(vsi, i)
WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
}
/**
* ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
* @vsi: VSI to bring up Tx rings used by XDP
* @prog: bpf program that will be assigned to VSI
*
* Return 0 on success and negative value on error
*/
int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
int xdp_rings_rem = vsi->num_xdp_txq;
struct ice_pf *pf = vsi->back;
struct ice_qs_cfg xdp_qs_cfg = {
.qs_mutex = &pf->avail_q_mutex,
.pf_map = pf->avail_txqs,
.pf_map_size = pf->max_pf_txqs,
.q_count = vsi->num_xdp_txq,
.scatter_count = ICE_MAX_SCATTER_TXQS,
.vsi_map = vsi->txq_map,
.vsi_map_offset = vsi->alloc_txq,
.mapping_mode = ICE_VSI_MAP_CONTIG
};
enum ice_status status;
int i, v_idx;
vsi->xdp_rings = devm_kcalloc(&pf->pdev->dev, vsi->num_xdp_txq,
sizeof(*vsi->xdp_rings), GFP_KERNEL);
if (!vsi->xdp_rings)
return -ENOMEM;
vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
if (__ice_vsi_get_qs(&xdp_qs_cfg))
goto err_map_xdp;
if (ice_xdp_alloc_setup_rings(vsi))
goto clear_xdp_rings;
/* follow the logic from ice_vsi_map_rings_to_vectors */
ice_for_each_q_vector(vsi, v_idx) {
struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
int xdp_rings_per_v, q_id, q_base;
xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
vsi->num_q_vectors - v_idx);
q_base = vsi->num_xdp_txq - xdp_rings_rem;
for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
struct ice_ring *xdp_ring = vsi->xdp_rings[q_id];
xdp_ring->q_vector = q_vector;
xdp_ring->next = q_vector->tx.ring;
q_vector->tx.ring = xdp_ring;
}
xdp_rings_rem -= xdp_rings_per_v;
}
/* omit the scheduler update if in reset path; XDP queues will be
* taken into account at the end of ice_vsi_rebuild, where
* ice_cfg_vsi_lan is being called
*/
if (ice_is_reset_in_progress(pf->state))
return 0;
/* tell the Tx scheduler that right now we have
* additional queues
*/
for (i = 0; i < vsi->tc_cfg.numtc; i++)
max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
max_txqs);
if (status) {
dev_err(&pf->pdev->dev,
"Failed VSI LAN queue config for XDP, error:%d\n",
status);
goto clear_xdp_rings;
}
ice_vsi_assign_bpf_prog(vsi, prog);
return 0;
clear_xdp_rings:
for (i = 0; i < vsi->num_xdp_txq; i++)
if (vsi->xdp_rings[i]) {
kfree_rcu(vsi->xdp_rings[i], rcu);
vsi->xdp_rings[i] = NULL;
}
err_map_xdp:
mutex_lock(&pf->avail_q_mutex);
for (i = 0; i < vsi->num_xdp_txq; i++) {
clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
}
mutex_unlock(&pf->avail_q_mutex);
devm_kfree(&pf->pdev->dev, vsi->xdp_rings);
return -ENOMEM;
}
/**
* ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
* @vsi: VSI to remove XDP rings
*
* Detach XDP rings from irq vectors, clean up the PF bitmap and free
* resources
*/
int ice_destroy_xdp_rings(struct ice_vsi *vsi)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
struct ice_pf *pf = vsi->back;
int i, v_idx;
/* q_vectors are freed in reset path so there's no point in detaching
* rings; in case of rebuild being triggered not from reset reset bits
* in pf->state won't be set, so additionally check first q_vector
* against NULL
*/
if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
goto free_qmap;
ice_for_each_q_vector(vsi, v_idx) {
struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
struct ice_ring *ring;
ice_for_each_ring(ring, q_vector->tx)
if (!ring->tx_buf || !ice_ring_is_xdp(ring))
break;
/* restore the value of last node prior to XDP setup */
q_vector->tx.ring = ring;
}
free_qmap:
mutex_lock(&pf->avail_q_mutex);
for (i = 0; i < vsi->num_xdp_txq; i++) {
clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
}
mutex_unlock(&pf->avail_q_mutex);
for (i = 0; i < vsi->num_xdp_txq; i++)
if (vsi->xdp_rings[i]) {
if (vsi->xdp_rings[i]->desc)
ice_free_tx_ring(vsi->xdp_rings[i]);
kfree_rcu(vsi->xdp_rings[i], rcu);
vsi->xdp_rings[i] = NULL;
}
devm_kfree(&pf->pdev->dev, vsi->xdp_rings);
vsi->xdp_rings = NULL;
if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
return 0;
ice_vsi_assign_bpf_prog(vsi, NULL);
/* notify Tx scheduler that we destroyed XDP queues and bring
* back the old number of child nodes
*/
for (i = 0; i < vsi->tc_cfg.numtc; i++)
max_txqs[i] = vsi->num_txq;
return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
max_txqs);
}
/**
* ice_xdp_setup_prog - Add or remove XDP eBPF program
* @vsi: VSI to setup XDP for
* @prog: XDP program
* @extack: netlink extended ack
*/
static int
ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
bool if_running = netif_running(vsi->netdev);
int ret = 0, xdp_ring_err = 0;
if (frame_size > vsi->rx_buf_len) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
return -EOPNOTSUPP;
}
/* need to stop netdev while setting up the program for Rx rings */
if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) {
ret = ice_down(vsi);
if (ret) {
NL_SET_ERR_MSG_MOD(extack,
"Preparing device for XDP attach failed");
return ret;
}
}
if (!ice_is_xdp_ena_vsi(vsi) && prog) {
vsi->num_xdp_txq = vsi->alloc_txq;
xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
if (xdp_ring_err)
NL_SET_ERR_MSG_MOD(extack,
"Setting up XDP Tx resources failed");
} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
xdp_ring_err = ice_destroy_xdp_rings(vsi);
if (xdp_ring_err)
NL_SET_ERR_MSG_MOD(extack,
"Freeing XDP Tx resources failed");
} else {
ice_vsi_assign_bpf_prog(vsi, prog);
}
if (if_running)
ret = ice_up(vsi);
if (!ret && prog && vsi->xsk_umems) {
int i;
ice_for_each_rxq(vsi, i) {
struct ice_ring *rx_ring = vsi->rx_rings[i];
if (rx_ring->xsk_umem)
napi_schedule(&rx_ring->q_vector->napi);
}
}
return (ret || xdp_ring_err) ? -ENOMEM : 0;
}
/**
* ice_xdp - implements XDP handler
* @dev: netdevice
* @xdp: XDP command
*/
static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
struct ice_netdev_priv *np = netdev_priv(dev);
struct ice_vsi *vsi = np->vsi;
if (vsi->type != ICE_VSI_PF) {
NL_SET_ERR_MSG_MOD(xdp->extack,
"XDP can be loaded only on PF VSI");
return -EINVAL;
}
switch (xdp->command) {
case XDP_SETUP_PROG:
return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
case XDP_QUERY_PROG:
xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
return 0;
case XDP_SETUP_XSK_UMEM:
return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
xdp->xsk.queue_id);
default:
return -EINVAL;
}
}
/**
* ice_ena_misc_vector - enable the non-queue interrupts
* @pf: board private structure
@ -2219,6 +2538,8 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
status = -ENODEV;
goto unroll_vsi_setup;
}
/* netdev has to be configured before setting frame size */
ice_vsi_cfg_frame_size(vsi);
/* registering the NAPI handler requires both the queues and
* netdev to be created, which are done in ice_pf_vsi_setup()
@ -3505,6 +3826,8 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
ice_vsi_cfg_dcb_rings(vsi);
err = ice_vsi_cfg_lan_txqs(vsi);
if (!err && ice_is_xdp_ena_vsi(vsi))
err = ice_vsi_cfg_xdp_txqs(vsi);
if (!err)
err = ice_vsi_cfg_rxqs(vsi);
@ -3920,6 +4243,13 @@ int ice_down(struct ice_vsi *vsi)
netdev_err(vsi->netdev,
"Failed stop Tx rings, VSI %d error %d\n",
vsi->vsi_num, tx_err);
if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
if (tx_err)
netdev_err(vsi->netdev,
"Failed stop XDP rings, VSI %d error %d\n",
vsi->vsi_num, tx_err);
}
rx_err = ice_vsi_stop_rx_rings(vsi);
if (rx_err)
@ -4328,6 +4658,18 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
dev_err(dev, "Rebuild failed, unload and reload driver\n");
}
/**
* ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
* @vsi: Pointer to VSI structure
*/
static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
{
if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
else
return ICE_RXBUF_3072;
}
/**
* ice_change_mtu - NDO callback to change the MTU
* @netdev: network interface device structure
@ -4347,6 +4689,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
return 0;
}
if (ice_is_xdp_ena_vsi(vsi)) {
int frame_size = ice_max_xdp_frame_size(vsi);
if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
netdev_err(netdev, "max MTU for XDP usage is %d\n",
frame_size - ICE_ETH_PKT_HDR_PAD);
return -EINVAL;
}
}
if (new_mtu < netdev->min_mtu) {
netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
netdev->min_mtu);
@ -4878,4 +5230,7 @@ static const struct net_device_ops ice_netdev_ops = {
.ndo_fdb_add = ice_fdb_add,
.ndo_fdb_del = ice_fdb_del,
.ndo_tx_timeout = ice_tx_timeout,
.ndo_bpf = ice_xdp,
.ndo_xdp_xmit = ice_xdp_xmit,
.ndo_xsk_wakeup = ice_xsk_wakeup,
};

View File

@ -5,8 +5,13 @@
#include <linux/prefetch.h>
#include <linux/mm.h>
#include <linux/bpf_trace.h>
#include <net/xdp.h>
#include "ice_txrx_lib.h"
#include "ice_lib.h"
#include "ice.h"
#include "ice_dcb_lib.h"
#include "ice_xsk.h"
#define ICE_RX_HDR_SIZE 256
@ -19,7 +24,10 @@ static void
ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
{
if (tx_buf->skb) {
dev_kfree_skb_any(tx_buf->skb);
if (ice_ring_is_xdp(ring))
page_frag_free(tx_buf->raw_buf);
else
dev_kfree_skb_any(tx_buf->skb);
if (dma_unmap_len(tx_buf, len))
dma_unmap_single(ring->dev,
dma_unmap_addr(tx_buf, dma),
@ -51,6 +59,11 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
{
u16 i;
if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
ice_xsk_clean_xdp_ring(tx_ring);
goto tx_skip_free;
}
/* ring already cleared, nothing to do */
if (!tx_ring->tx_buf)
return;
@ -59,6 +72,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
for (i = 0; i < tx_ring->count; i++)
ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
tx_skip_free:
memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
/* Zero out the descriptor ring */
@ -136,8 +150,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
total_bytes += tx_buf->bytecount;
total_pkts += tx_buf->gso_segs;
/* free the skb */
napi_consume_skb(tx_buf->skb, napi_budget);
if (ice_ring_is_xdp(tx_ring))
page_frag_free(tx_buf->raw_buf);
else
/* free the skb */
napi_consume_skb(tx_buf->skb, napi_budget);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@ -188,12 +205,11 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
i += tx_ring->count;
tx_ring->next_to_clean = i;
u64_stats_update_begin(&tx_ring->syncp);
tx_ring->stats.bytes += total_bytes;
tx_ring->stats.pkts += total_pkts;
u64_stats_update_end(&tx_ring->syncp);
tx_ring->q_vector->tx.total_bytes += total_bytes;
tx_ring->q_vector->tx.total_pkts += total_pkts;
ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
if (ice_ring_is_xdp(tx_ring))
return !!budget;
netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
total_bytes);
@ -273,6 +289,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
if (!rx_ring->rx_buf)
return;
if (rx_ring->xsk_umem) {
ice_xsk_clean_rx_ring(rx_ring);
goto rx_skip_free;
}
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
@ -289,10 +310,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
*/
dma_sync_single_range_for_cpu(dev, rx_buf->dma,
rx_buf->page_offset,
ICE_RXBUF_2048, DMA_FROM_DEVICE);
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* free resources associated with mapping */
dma_unmap_page_attrs(dev, rx_buf->dma, PAGE_SIZE,
dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
@ -300,6 +322,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
rx_buf->page_offset = 0;
}
rx_skip_free:
memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
/* Zero out the descriptor ring */
@ -319,6 +342,10 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
void ice_free_rx_ring(struct ice_ring *rx_ring)
{
ice_clean_rx_ring(rx_ring);
if (rx_ring->vsi->type == ICE_VSI_PF)
if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
rx_ring->xdp_prog = NULL;
devm_kfree(rx_ring->dev, rx_ring->rx_buf);
rx_ring->rx_buf = NULL;
@ -363,6 +390,15 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring)
rx_ring->next_to_use = 0;
rx_ring->next_to_clean = 0;
if (ice_is_xdp_ena_vsi(rx_ring->vsi))
WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
if (rx_ring->vsi->type == ICE_VSI_PF &&
!xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
rx_ring->q_index))
goto err;
return 0;
err:
@ -372,34 +408,110 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring)
}
/**
* ice_release_rx_desc - Store the new tail and head values
* @rx_ring: ring to bump
* @val: new head index
* ice_rx_offset - Return expected offset into page to access data
* @rx_ring: Ring we are requesting offset of
*
* Returns the offset value for ring into the data buffer.
*/
static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
{
u16 prev_ntu = rx_ring->next_to_use;
if (ice_ring_uses_build_skb(rx_ring))
return ICE_SKB_PAD;
else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
return XDP_PACKET_HEADROOM;
rx_ring->next_to_use = val;
return 0;
}
/* update next to alloc since we have filled the ring */
rx_ring->next_to_alloc = val;
/**
* ice_run_xdp - Executes an XDP program on initialized xdp_buff
* @rx_ring: Rx ring
* @xdp: xdp_buff used as input to the XDP program
* @xdp_prog: XDP program to run
*
* Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
*/
static int
ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
int err, result = ICE_XDP_PASS;
struct ice_ring *xdp_ring;
u32 act;
/* QRX_TAIL will be updated with any tail value, but hardware ignores
* the lower 3 bits. This makes it so we only bump tail on meaningful
* boundaries. Also, this allows us to bump tail on intervals of 8 up to
* the budget depending on the current traffic load.
*/
val &= ~0x7;
if (prev_ntu != val) {
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
wmb();
writel(val, rx_ring->tail);
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
break;
case XDP_TX:
xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()];
result = ice_xmit_xdp_buff(xdp, xdp_ring);
break;
case XDP_REDIRECT:
err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
break;
default:
bpf_warn_invalid_xdp_action(act);
/* fallthrough -- not supported action */
case XDP_ABORTED:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
/* fallthrough -- handle aborts by dropping frame */
case XDP_DROP:
result = ICE_XDP_CONSUMED;
break;
}
return result;
}
/**
* ice_xdp_xmit - submit packets to XDP ring for transmission
* @dev: netdev
* @n: number of XDP frames to be transmitted
* @frames: XDP frames to be transmitted
* @flags: transmit flags
*
* Returns number of frames successfully sent. Frames that fail are
* free'ed via XDP return API.
* For error cases, a negative errno code is returned and no-frames
* are transmitted (caller must handle freeing frames).
*/
int
ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags)
{
struct ice_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id();
struct ice_vsi *vsi = np->vsi;
struct ice_ring *xdp_ring;
int drops = 0, i;
if (test_bit(__ICE_DOWN, vsi->state))
return -ENETDOWN;
if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
return -ENXIO;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;
xdp_ring = vsi->xdp_rings[queue_index];
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
int err;
err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
if (err != ICE_XDP_TX) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
if (unlikely(flags & XDP_XMIT_FLUSH))
ice_xdp_ring_update_tail(xdp_ring);
return n - drops;
}
/**
@ -423,28 +535,28 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
}
/* alloc new page for storage */
page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
page = dev_alloc_pages(ice_rx_pg_order(rx_ring));
if (unlikely(!page)) {
rx_ring->rx_stats.alloc_page_failed++;
return false;
}
/* map page for use */
dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
/* if mapping failed free memory back to system since
* there isn't much point in holding memory we can't use
*/
if (dma_mapping_error(rx_ring->dev, dma)) {
__free_pages(page, 0);
__free_pages(page, ice_rx_pg_order(rx_ring));
rx_ring->rx_stats.alloc_page_failed++;
return false;
}
bi->dma = dma;
bi->page = page;
bi->page_offset = 0;
bi->page_offset = ice_rx_offset(rx_ring);
page_ref_add(page, USHRT_MAX - 1);
bi->pagecnt_bias = USHRT_MAX;
@ -486,7 +598,7 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
bi->page_offset,
ICE_RXBUF_2048,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* Refresh the desc even if buffer_addrs didn't change
@ -557,9 +669,6 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
*/
static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
{
#if (PAGE_SIZE >= 8192)
unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;
#endif
unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
struct page *page = rx_buf->page;
@ -572,7 +681,9 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
if (unlikely((page_count(page) - pagecnt_bias) > 1))
return false;
#else
if (rx_buf->page_offset > last_offset)
#define ICE_LAST_OFFSET \
(SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048)
if (rx_buf->page_offset > ICE_LAST_OFFSET)
return false;
#endif /* PAGE_SIZE < 8192) */
@ -590,6 +701,7 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
/**
* ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: buffer containing page to add
* @skb: sk_buff to place the data into
* @size: packet length from rx_desc
@ -599,13 +711,13 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
* The function will then update the page offset.
*/
static void
ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb,
unsigned int size)
ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
struct sk_buff *skb, unsigned int size)
{
#if (PAGE_SIZE >= 8192)
unsigned int truesize = SKB_DATA_ALIGN(size);
unsigned int truesize = SKB_DATA_ALIGN(size + ice_rx_offset(rx_ring));
#else
unsigned int truesize = ICE_RXBUF_2048;
unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
#endif
if (!size)
@ -678,11 +790,65 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
return rx_buf;
}
/**
* ice_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
* @xdp: xdp_buff pointing to the data
*
* This function builds an skb around an existing Rx buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead.
*/
static struct sk_buff *
ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
struct xdp_buff *xdp)
{
unsigned int metasize = xdp->data - xdp->data_meta;
#if (PAGE_SIZE < 8192)
unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
SKB_DATA_ALIGN(xdp->data_end -
xdp->data_hard_start);
#endif
struct sk_buff *skb;
/* Prefetch first cache line of first page. If xdp->data_meta
* is unused, this points exactly as xdp->data, otherwise we
* likely have a consumer accessing first few bytes of meta
* data, and then actual data.
*/
prefetch(xdp->data_meta);
#if L1_CACHE_BYTES < 128
prefetch((void *)(xdp->data + L1_CACHE_BYTES));
#endif
/* build an skb around the page buffer */
skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb))
return NULL;
/* must to record Rx queue, otherwise OS features such as
* symmetric queue won't work
*/
skb_record_rx_queue(skb, rx_ring->q_index);
/* update pointers within the skb to store the data */
skb_reserve(skb, xdp->data - xdp->data_hard_start);
__skb_put(skb, xdp->data_end - xdp->data);
if (metasize)
skb_metadata_set(skb, metasize);
/* buffer is used by skb, update page_offset */
ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
return skb;
}
/**
* ice_construct_skb - Allocate skb and populate it
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
* @size: the length of the packet
* @xdp: xdp_buff pointing to the data
*
* This function allocates an skb. It then populates it with the page
* data from the current receive descriptor, taking care to set up the
@ -690,16 +856,16 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
*/
static struct sk_buff *
ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
unsigned int size)
struct xdp_buff *xdp)
{
void *va = page_address(rx_buf->page) + rx_buf->page_offset;
unsigned int size = xdp->data_end - xdp->data;
unsigned int headlen;
struct sk_buff *skb;
/* prefetch first cache line of first page */
prefetch(va);
prefetch(xdp->data);
#if L1_CACHE_BYTES < 128
prefetch((u8 *)va + L1_CACHE_BYTES);
prefetch((void *)(xdp->data + L1_CACHE_BYTES));
#endif /* L1_CACHE_BYTES */
/* allocate a skb to store the frags */
@ -712,10 +878,11 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
/* Determine available headroom for copy */
headlen = size;
if (headlen > ICE_RX_HDR_SIZE)
headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE);
headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
sizeof(long)));
/* if we exhaust the linear part then add what is left as a frag */
size -= headlen;
@ -723,7 +890,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
#if (PAGE_SIZE >= 8192)
unsigned int truesize = SKB_DATA_ALIGN(size);
#else
unsigned int truesize = ICE_RXBUF_2048;
unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
#endif
skb_add_rx_frag(skb, 0, rx_buf->page,
rx_buf->page_offset + headlen, size, truesize);
@ -745,11 +912,18 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
*
* This function will clean up the contents of the rx_buf. It will
* either recycle the buffer or unmap it and free the associated resources.
* This function will update next_to_clean and then clean up the contents
* of the rx_buf. It will either recycle the buffer or unmap it and free
* the associated resources.
*/
static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
{
u32 ntc = rx_ring->next_to_clean + 1;
/* fetch, update, and store next to clean */
ntc = (ntc < rx_ring->count) ? ntc : 0;
rx_ring->next_to_clean = ntc;
if (!rx_buf)
return;
@ -759,8 +933,9 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
rx_ring->rx_stats.page_reuse_count++;
} else {
/* we are not reusing the buffer so unmap it */
dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma, PAGE_SIZE,
DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
ICE_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
}
@ -790,206 +965,31 @@ static bool ice_cleanup_headers(struct sk_buff *skb)
return false;
}
/**
* ice_test_staterr - tests bits in Rx descriptor status and error fields
* @rx_desc: pointer to receive descriptor (in le64 format)
* @stat_err_bits: value to mask
*
* This function does some fast chicanery in order to return the
* value of the mask which is really only used for boolean tests.
* The status_error_len doesn't need to be shifted because it begins
* at offset zero.
*/
static bool
ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
{
return !!(rx_desc->wb.status_error0 &
cpu_to_le16(stat_err_bits));
}
/**
* ice_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
* @rx_desc: Rx descriptor for current buffer
* @skb: Current socket buffer containing buffer in progress
*
* This function updates next to clean. If the buffer is an EOP buffer
* this function exits returning false, otherwise it will place the
* sk_buff in the next buffer to be chained and return true indicating
* that this is in fact a non-EOP buffer.
* If the buffer is an EOP buffer, this function exits returning false,
* otherwise return true indicating that this is in fact a non-EOP buffer.
*/
static bool
ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
struct sk_buff *skb)
{
u32 ntc = rx_ring->next_to_clean + 1;
/* fetch, update, and store next to clean */
ntc = (ntc < rx_ring->count) ? ntc : 0;
rx_ring->next_to_clean = ntc;
prefetch(ICE_RX_DESC(rx_ring, ntc));
/* if we are the last buffer then there is nothing else to do */
#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
return false;
/* place skb in next buffer to be received */
rx_ring->rx_buf[ntc].skb = skb;
rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb;
rx_ring->rx_stats.non_eop_descs++;
return true;
}
/**
* ice_ptype_to_htype - get a hash type
* @ptype: the ptype value from the descriptor
*
* Returns a hash type to be used by skb_set_hash
*/
static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
{
return PKT_HASH_TYPE_NONE;
}
/**
* ice_rx_hash - set the hash value in the skb
* @rx_ring: descriptor ring
* @rx_desc: specific descriptor
* @skb: pointer to current skb
* @rx_ptype: the ptype value from the descriptor
*/
static void
ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
struct sk_buff *skb, u8 rx_ptype)
{
struct ice_32b_rx_flex_desc_nic *nic_mdid;
u32 hash;
if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
return;
if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
return;
nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
hash = le32_to_cpu(nic_mdid->rss_hash);
skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
}
/**
* ice_rx_csum - Indicate in skb if checksum is good
* @ring: the ring we care about
* @skb: skb currently being received and modified
* @rx_desc: the receive descriptor
* @ptype: the packet type decoded by hardware
*
* skb->protocol must be set before this function is called
*/
static void
ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
{
struct ice_rx_ptype_decoded decoded;
u32 rx_error, rx_status;
bool ipv4, ipv6;
rx_status = le16_to_cpu(rx_desc->wb.status_error0);
rx_error = rx_status;
decoded = ice_decode_rx_desc_ptype(ptype);
/* Start with CHECKSUM_NONE and by default csum_level = 0 */
skb->ip_summed = CHECKSUM_NONE;
skb_checksum_none_assert(skb);
/* check if Rx checksum is enabled */
if (!(ring->netdev->features & NETIF_F_RXCSUM))
return;
/* check if HW has decoded the packet and checksum */
if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
return;
if (!(decoded.known && decoded.outer_ip))
return;
ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
(decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
(decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
goto checksum_fail;
else if (ipv6 && (rx_status &
(BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
goto checksum_fail;
/* check for L4 errors and handle packets that were not able to be
* checksummed due to arrival speed
*/
if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
goto checksum_fail;
/* Only report checksum unnecessary for TCP, UDP, or SCTP */
switch (decoded.inner_prot) {
case ICE_RX_PTYPE_INNER_PROT_TCP:
case ICE_RX_PTYPE_INNER_PROT_UDP:
case ICE_RX_PTYPE_INNER_PROT_SCTP:
skb->ip_summed = CHECKSUM_UNNECESSARY;
default:
break;
}
return;
checksum_fail:
ring->vsi->back->hw_csum_rx_error++;
}
/**
* ice_process_skb_fields - Populate skb header fields from Rx descriptor
* @rx_ring: Rx descriptor ring packet is being transacted on
* @rx_desc: pointer to the EOP Rx descriptor
* @skb: pointer to current skb being populated
* @ptype: the packet type decoded by hardware
*
* This function checks the ring, descriptor, and packet information in
* order to populate the hash, checksum, VLAN, protocol, and
* other fields within the skb.
*/
static void
ice_process_skb_fields(struct ice_ring *rx_ring,
union ice_32b_rx_flex_desc *rx_desc,
struct sk_buff *skb, u8 ptype)
{
ice_rx_hash(rx_ring, rx_desc, skb, ptype);
/* modifies the skb - consumes the enet header */
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
ice_rx_csum(rx_ring, skb, rx_desc, ptype);
}
/**
* ice_receive_skb - Send a completed packet up the stack
* @rx_ring: Rx ring in play
* @skb: packet to send up
* @vlan_tag: VLAN tag for packet
*
* This function sends the completed packet (via. skb) up the stack using
* gro receive functions (with/without VLAN tag)
*/
static void
ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
{
if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
(vlan_tag & VLAN_VID_MASK))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
napi_gro_receive(&rx_ring->q_vector->napi, skb);
}
/**
* ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: Rx descriptor ring to transact packets on
@ -1006,8 +1006,13 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
unsigned int xdp_res, xdp_xmit = 0;
struct bpf_prog *xdp_prog = NULL;
struct xdp_buff xdp;
bool failure;
xdp.rxq = &rx_ring->xdp_rxq;
/* start the loop to process Rx packets bounded by 'budget' */
while (likely(total_rx_pkts < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;
@ -1042,10 +1047,57 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
/* retrieve a buffer from the ring */
rx_buf = ice_get_rx_buf(rx_ring, &skb, size);
if (!size) {
xdp.data = NULL;
xdp.data_end = NULL;
xdp.data_hard_start = NULL;
xdp.data_meta = NULL;
goto construct_skb;
}
xdp.data = page_address(rx_buf->page) + rx_buf->page_offset;
xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring);
xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + size;
rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
if (!xdp_prog) {
rcu_read_unlock();
goto construct_skb;
}
xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog);
rcu_read_unlock();
if (!xdp_res)
goto construct_skb;
if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
unsigned int truesize;
#if (PAGE_SIZE < 8192)
truesize = ice_rx_pg_size(rx_ring) / 2;
#else
truesize = SKB_DATA_ALIGN(ice_rx_offset(rx_ring) +
size);
#endif
xdp_xmit |= xdp_res;
ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
} else {
rx_buf->pagecnt_bias++;
}
total_rx_bytes += size;
total_rx_pkts++;
cleaned_count++;
ice_put_rx_buf(rx_ring, rx_buf);
continue;
construct_skb:
if (skb)
ice_add_rx_frag(rx_buf, skb, size);
ice_add_rx_frag(rx_ring, rx_buf, skb, size);
else if (ice_ring_uses_build_skb(rx_ring))
skb = ice_build_skb(rx_ring, rx_buf, &xdp);
else
skb = ice_construct_skb(rx_ring, rx_buf, size);
skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
/* exit if we failed to retrieve a buffer */
if (!skb) {
@ -1099,13 +1151,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
/* return up to cleaned_count buffers to hardware */
failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
/* update queue and vector specific stats */
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->stats.pkts += total_rx_pkts;
rx_ring->stats.bytes += total_rx_bytes;
u64_stats_update_end(&rx_ring->syncp);
rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
if (xdp_prog)
ice_finalize_xdp_rx(rx_ring, xdp_xmit);
ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);
/* guarantee a trip back through this routine if there was a failure */
return failure ? budget : (int)total_rx_pkts;
@ -1483,9 +1532,14 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
/* Since the actual Tx work is minimal, we can give the Tx a larger
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
ice_for_each_ring(ring, q_vector->tx)
if (!ice_clean_tx_irq(ring, budget))
ice_for_each_ring(ring, q_vector->tx) {
bool wd = ring->xsk_umem ?
ice_clean_tx_irq_zc(ring, budget) :
ice_clean_tx_irq(ring, budget);
if (!wd)
clean_complete = false;
}
/* Handle case where we are called by netpoll with a budget of 0 */
if (unlikely(budget <= 0))
@ -1505,7 +1559,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
ice_for_each_ring(ring, q_vector->rx) {
int cleaned;
cleaned = ice_clean_rx_irq(ring, budget_per_ring);
/* A dedicated path for zero-copy allows making a single
* comparison in the irq context instead of many inside the
* ice_clean_rx_irq function and makes the codebase cleaner.
*/
cleaned = ring->xsk_umem ?
ice_clean_rx_irq_zc(ring, budget_per_ring) :
ice_clean_rx_irq(ring, budget_per_ring);
work_done += cleaned;
/* if we clean as many as budgeted, we must not be done */
if (cleaned >= budget_per_ring)
@ -1527,17 +1587,6 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
return min_t(int, work_done, budget - 1);
}
/* helper function for building cmd/type/offset */
static __le64
build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
{
return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
(td_cmd << ICE_TXD_QW1_CMD_S) |
(td_offset << ICE_TXD_QW1_OFFSET_S) |
((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
(td_tag << ICE_TXD_QW1_L2TAG1_S));
}
/**
* __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
* @tx_ring: the ring to be checked
@ -1689,9 +1738,9 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
i = 0;
/* write last descriptor with RS and EOP bits */
td_cmd |= (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
tx_desc->cmd_type_offset_bsz =
build_ctob(td_cmd, td_offset, size, td_tag);
td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, size,
td_tag);
/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch.

View File

@ -4,8 +4,12 @@
#ifndef _ICE_TXRX_H_
#define _ICE_TXRX_H_
#include "ice_type.h"
#define ICE_DFLT_IRQ_WORK 256
#define ICE_RXBUF_3072 3072
#define ICE_RXBUF_2048 2048
#define ICE_RXBUF_1536 1536
#define ICE_MAX_CHAINED_RX_BUFS 5
#define ICE_MAX_BUF_TXD 8
#define ICE_MIN_TX_LEN 17
@ -22,6 +26,71 @@
#define ICE_RX_BUF_WRITE 16 /* Must be power of 2 */
#define ICE_MAX_TXQ_PER_TXQG 128
/* Attempt to maximize the headroom available for incoming frames. We use a 2K
* buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame.
* This leaves us with 512 bytes of room. From that we need to deduct the
* space needed for the shared info and the padding needed to IP align the
* frame.
*
* Note: For cache line sizes 256 or larger this value is going to end
* up negative. In these cases we should fall back to the legacy
* receive path.
*/
#if (PAGE_SIZE < 8192)
#define ICE_2K_TOO_SMALL_WITH_PADDING \
((NET_SKB_PAD + ICE_RXBUF_1536) > SKB_WITH_OVERHEAD(ICE_RXBUF_2048))
/**
* ice_compute_pad - compute the padding
* rx_buf_len: buffer length
*
* Figure out the size of half page based on given buffer length and
* then subtract the skb_shared_info followed by subtraction of the
* actual buffer length; this in turn results in the actual space that
* is left for padding usage
*/
static inline int ice_compute_pad(int rx_buf_len)
{
int half_page_size;
half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len;
}
/**
* ice_skb_pad - determine the padding that we can supply
*
* Figure out the right Rx buffer size and based on that calculate the
* padding
*/
static inline int ice_skb_pad(void)
{
int rx_buf_len;
/* If a 2K buffer cannot handle a standard Ethernet frame then
* optimize padding for a 3K buffer instead of a 1.5K buffer.
*
* For a 3K buffer we need to add enough padding to allow for
* tailroom due to NET_IP_ALIGN possibly shifting us out of
* cache-line alignment.
*/
if (ICE_2K_TOO_SMALL_WITH_PADDING)
rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
else
rx_buf_len = ICE_RXBUF_1536;
/* if needed make room for NET_IP_ALIGN */
rx_buf_len -= NET_IP_ALIGN;
return ice_compute_pad(rx_buf_len);
}
#define ICE_SKB_PAD ice_skb_pad()
#else
#define ICE_2K_TOO_SMALL_WITH_PADDING false
#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#endif
/* We are assuming that the cache line is always 64 Bytes here for ice.
* In order to make sure that is a correct assumption there is a check in probe
* to print a warning if the read from GLPCI_CNF2 tells us that the cache line
@ -49,12 +118,24 @@
#define ICE_TX_FLAGS_VLAN_PR_S 29
#define ICE_TX_FLAGS_VLAN_S 16
#define ICE_XDP_PASS 0
#define ICE_XDP_CONSUMED BIT(0)
#define ICE_XDP_TX BIT(1)
#define ICE_XDP_REDIR BIT(2)
#define ICE_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
#define ICE_ETH_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)
struct ice_tx_buf {
struct ice_tx_desc *next_to_watch;
struct sk_buff *skb;
union {
struct sk_buff *skb;
void *raw_buf; /* used for XDP */
};
unsigned int bytecount;
unsigned short gso_segs;
u32 tx_flags;
@ -76,9 +157,17 @@ struct ice_tx_offload_params {
struct ice_rx_buf {
struct sk_buff *skb;
dma_addr_t dma;
struct page *page;
unsigned int page_offset;
u16 pagecnt_bias;
union {
struct {
struct page *page;
unsigned int page_offset;
u16 pagecnt_bias;
};
struct {
void *addr;
u64 handle;
};
};
};
struct ice_q_stats {
@ -198,18 +287,44 @@ struct ice_ring {
};
struct rcu_head rcu; /* to avoid race on free */
struct bpf_prog *xdp_prog;
struct xdp_umem *xsk_umem;
struct zero_copy_allocator zca;
/* CL3 - 3rd cacheline starts here */
struct xdp_rxq_info xdp_rxq;
/* CLX - the below items are only accessed infrequently and should be
* in their own cache line if possible
*/
#define ICE_TX_FLAGS_RING_XDP BIT(0)
#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1)
u8 flags;
dma_addr_t dma; /* physical address of ring */
unsigned int size; /* length of descriptor ring in bytes */
u32 txq_teid; /* Added Tx queue TEID */
u16 rx_buf_len;
#ifdef CONFIG_DCB
u8 dcb_tc; /* Traffic class of ring */
#endif /* CONFIG_DCB */
} ____cacheline_internodealigned_in_smp;
static inline bool ice_ring_uses_build_skb(struct ice_ring *ring)
{
return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB);
}
static inline void ice_set_ring_build_skb_ena(struct ice_ring *ring)
{
ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB;
}
static inline void ice_clear_ring_build_skb_ena(struct ice_ring *ring)
{
ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB;
}
static inline bool ice_ring_is_xdp(struct ice_ring *ring)
{
return !!(ring->flags & ICE_TX_FLAGS_RING_XDP);
}
struct ice_ring_container {
/* head of linked-list of rings */
struct ice_ring *ring;
@ -230,6 +345,19 @@ struct ice_ring_container {
#define ice_for_each_ring(pos, head) \
for (pos = (head).ring; pos; pos = pos->next)
static inline unsigned int ice_rx_pg_order(struct ice_ring *ring)
{
#if (PAGE_SIZE < 8192)
if (ring->rx_buf_len > (PAGE_SIZE / 2))
return 1;
#endif
return 0;
}
#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring))
union ice_32b_rx_flex_desc;
bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count);
netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
void ice_clean_tx_ring(struct ice_ring *tx_ring);

View File

@ -0,0 +1,273 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019, Intel Corporation. */
#include "ice_txrx_lib.h"
/**
* ice_release_rx_desc - Store the new tail and head values
* @rx_ring: ring to bump
* @val: new head index
*/
void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
{
u16 prev_ntu = rx_ring->next_to_use;
rx_ring->next_to_use = val;
/* update next to alloc since we have filled the ring */
rx_ring->next_to_alloc = val;
/* QRX_TAIL will be updated with any tail value, but hardware ignores
* the lower 3 bits. This makes it so we only bump tail on meaningful
* boundaries. Also, this allows us to bump tail on intervals of 8 up to
* the budget depending on the current traffic load.
*/
val &= ~0x7;
if (prev_ntu != val) {
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
wmb();
writel(val, rx_ring->tail);
}
}
/**
* ice_ptype_to_htype - get a hash type
* @ptype: the ptype value from the descriptor
*
* Returns a hash type to be used by skb_set_hash
*/
static enum pkt_hash_types ice_ptype_to_htype(u8 __always_unused ptype)
{
return PKT_HASH_TYPE_NONE;
}
/**
* ice_rx_hash - set the hash value in the skb
* @rx_ring: descriptor ring
* @rx_desc: specific descriptor
* @skb: pointer to current skb
* @rx_ptype: the ptype value from the descriptor
*/
static void
ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
struct sk_buff *skb, u8 rx_ptype)
{
struct ice_32b_rx_flex_desc_nic *nic_mdid;
u32 hash;
if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
return;
if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
return;
nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
hash = le32_to_cpu(nic_mdid->rss_hash);
skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
}
/**
* ice_rx_csum - Indicate in skb if checksum is good
* @ring: the ring we care about
* @skb: skb currently being received and modified
* @rx_desc: the receive descriptor
* @ptype: the packet type decoded by hardware
*
* skb->protocol must be set before this function is called
*/
static void
ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
{
struct ice_rx_ptype_decoded decoded;
u32 rx_error, rx_status;
bool ipv4, ipv6;
rx_status = le16_to_cpu(rx_desc->wb.status_error0);
rx_error = rx_status;
decoded = ice_decode_rx_desc_ptype(ptype);
/* Start with CHECKSUM_NONE and by default csum_level = 0 */
skb->ip_summed = CHECKSUM_NONE;
skb_checksum_none_assert(skb);
/* check if Rx checksum is enabled */
if (!(ring->netdev->features & NETIF_F_RXCSUM))
return;
/* check if HW has decoded the packet and checksum */
if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
return;
if (!(decoded.known && decoded.outer_ip))
return;
ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
(decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
(decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
goto checksum_fail;
else if (ipv6 && (rx_status &
(BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
goto checksum_fail;
/* check for L4 errors and handle packets that were not able to be
* checksummed due to arrival speed
*/
if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
goto checksum_fail;
/* Only report checksum unnecessary for TCP, UDP, or SCTP */
switch (decoded.inner_prot) {
case ICE_RX_PTYPE_INNER_PROT_TCP:
case ICE_RX_PTYPE_INNER_PROT_UDP:
case ICE_RX_PTYPE_INNER_PROT_SCTP:
skb->ip_summed = CHECKSUM_UNNECESSARY;
default:
break;
}
return;
checksum_fail:
ring->vsi->back->hw_csum_rx_error++;
}
/**
* ice_process_skb_fields - Populate skb header fields from Rx descriptor
* @rx_ring: Rx descriptor ring packet is being transacted on
* @rx_desc: pointer to the EOP Rx descriptor
* @skb: pointer to current skb being populated
* @ptype: the packet type decoded by hardware
*
* This function checks the ring, descriptor, and packet information in
* order to populate the hash, checksum, VLAN, protocol, and
* other fields within the skb.
*/
void
ice_process_skb_fields(struct ice_ring *rx_ring,
union ice_32b_rx_flex_desc *rx_desc,
struct sk_buff *skb, u8 ptype)
{
ice_rx_hash(rx_ring, rx_desc, skb, ptype);
/* modifies the skb - consumes the enet header */
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
ice_rx_csum(rx_ring, skb, rx_desc, ptype);
}
/**
* ice_receive_skb - Send a completed packet up the stack
* @rx_ring: Rx ring in play
* @skb: packet to send up
* @vlan_tag: VLAN tag for packet
*
* This function sends the completed packet (via. skb) up the stack using
* gro receive functions (with/without VLAN tag)
*/
void
ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
{
if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
(vlan_tag & VLAN_VID_MASK))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
napi_gro_receive(&rx_ring->q_vector->napi, skb);
}
/**
* ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
* @data: packet data pointer
* @size: packet data size
* @xdp_ring: XDP ring for transmission
*/
int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring)
{
u16 i = xdp_ring->next_to_use;
struct ice_tx_desc *tx_desc;
struct ice_tx_buf *tx_buf;
dma_addr_t dma;
if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
xdp_ring->tx_stats.tx_busy++;
return ICE_XDP_CONSUMED;
}
dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
if (dma_mapping_error(xdp_ring->dev, dma))
return ICE_XDP_CONSUMED;
tx_buf = &xdp_ring->tx_buf[i];
tx_buf->bytecount = size;
tx_buf->gso_segs = 1;
tx_buf->raw_buf = data;
/* record length, and DMA address */
dma_unmap_len_set(tx_buf, len, size);
dma_unmap_addr_set(tx_buf, dma, dma);
tx_desc = ICE_TX_DESC(xdp_ring, i);
tx_desc->buf_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
size, 0);
/* Make certain all of the status bits have been updated
* before next_to_watch is written.
*/
smp_wmb();
i++;
if (i == xdp_ring->count)
i = 0;
tx_buf->next_to_watch = tx_desc;
xdp_ring->next_to_use = i;
return ICE_XDP_TX;
}
/**
* ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
* @xdp: XDP buffer
* @xdp_ring: XDP Tx ring
*
* Returns negative on failure, 0 on success.
*/
int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring)
{
struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return ICE_XDP_CONSUMED;
return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
}
/**
* ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
* @rx_ring: Rx ring
* @xdp_res: Result of the receive batch
*
* This function bumps XDP Tx tail and/or flush redirect map, and
* should be called when a batch of packets has been processed in the
* napi loop.
*/
void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res)
{
if (xdp_res & ICE_XDP_REDIR)
xdp_do_flush_map();
if (xdp_res & ICE_XDP_TX) {
struct ice_ring *xdp_ring =
rx_ring->vsi->xdp_rings[rx_ring->q_index];
ice_xdp_ring_update_tail(xdp_ring);
}
}

View File

@ -0,0 +1,59 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2019, Intel Corporation. */
#ifndef _ICE_TXRX_LIB_H_
#define _ICE_TXRX_LIB_H_
#include "ice.h"
/**
* ice_test_staterr - tests bits in Rx descriptor status and error fields
* @rx_desc: pointer to receive descriptor (in le64 format)
* @stat_err_bits: value to mask
*
* This function does some fast chicanery in order to return the
* value of the mask which is really only used for boolean tests.
* The status_error_len doesn't need to be shifted because it begins
* at offset zero.
*/
static inline bool
ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
{
return !!(rx_desc->wb.status_error0 & cpu_to_le16(stat_err_bits));
}
static inline __le64
build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
{
return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
(td_cmd << ICE_TXD_QW1_CMD_S) |
(td_offset << ICE_TXD_QW1_OFFSET_S) |
((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
(td_tag << ICE_TXD_QW1_L2TAG1_S));
}
/**
* ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
* @xdp_ring: XDP Tx ring
*
* This function updates the XDP Tx ring tail register.
*/
static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring)
{
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch.
*/
wmb();
writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
}
void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res);
int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring);
int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring);
void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val);
void
ice_process_skb_fields(struct ice_ring *rx_ring,
union ice_32b_rx_flex_desc *rx_desc,
struct sk_buff *skb, u8 ptype);
void
ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
#endif /* !_ICE_TXRX_LIB_H_ */

View File

@ -2,6 +2,7 @@
/* Copyright (c) 2018, Intel Corporation. */
#include "ice.h"
#include "ice_base.h"
#include "ice_lib.h"
/**

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,72 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2019, Intel Corporation. */
#ifndef _ICE_XSK_H_
#define _ICE_XSK_H_
#include "ice_txrx.h"
#include "ice.h"
struct ice_vsi;
#ifdef CONFIG_XDP_SOCKETS
int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid);
void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget);
bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget);
int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count);
bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring);
void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring);
#else
static inline int
ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi,
struct xdp_umem __always_unused *umem,
u16 __always_unused qid)
{
return -ENOTSUPP;
}
static inline void
ice_zca_free(struct zero_copy_allocator __always_unused *zca,
unsigned long __always_unused handle)
{
}
static inline int
ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring,
int __always_unused budget)
{
return 0;
}
static inline bool
ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring,
int __always_unused budget)
{
return false;
}
static inline bool
ice_alloc_rx_bufs_slow_zc(struct ice_ring __always_unused *rx_ring,
u16 __always_unused count)
{
return false;
}
static inline bool ice_xsk_any_rx_ring_ena(struct ice_vsi __always_unused *vsi)
{
return false;
}
static inline int
ice_xsk_wakeup(struct net_device __always_unused *netdev,
u32 __always_unused queue_id, u32 __always_unused flags)
{
return -ENOTSUPP;
}
#define ice_xsk_clean_rx_ring(rx_ring) do {} while (0)
#define ice_xsk_clean_xdp_ring(xdp_ring) do {} while (0)
#endif /* CONFIG_XDP_SOCKETS */
#endif /* !_ICE_XSK_H_ */