net/mlx5e: Re-attempt to offload flows on multipath port affinity events

Under multipath it's possible for us to offload the flow only through
the e-switch for which proper route through the uplink exists.
When the port is up and the next-hop route is set again we want to
offload through it as well.

We generate SW event from the FIB event handler when multipath port
affinity changes. The tc offloads code gets this event, goes over the
flows which were marked as of having missing route and attempts to
offload them.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
This commit is contained in:
Roi Dayan 2019-02-06 15:43:51 +02:00 committed by Saeed Mahameed
parent 6997b1c9ca
commit b4a23329e2
4 changed files with 71 additions and 12 deletions

View File

@ -1573,6 +1573,8 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
uplink_priv = &rpriv->uplink_priv;
INIT_LIST_HEAD(&uplink_priv->unready_flows);
/* init shared tc flow table */
err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
if (err)
@ -1632,27 +1634,38 @@ static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv)
static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data)
{
struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
struct mlx5_eqe *eqe = data;
if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
return NOTIFY_DONE;
if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
struct mlx5_eqe *eqe = data;
switch (eqe->sub_type) {
case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
queue_work(priv->wq, &priv->update_carrier_work);
break;
default:
return NOTIFY_DONE;
switch (eqe->sub_type) {
case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
queue_work(priv->wq, &priv->update_carrier_work);
break;
default:
return NOTIFY_DONE;
}
return NOTIFY_OK;
}
return NOTIFY_OK;
if (event == MLX5_DEV_EVENT_PORT_AFFINITY) {
struct mlx5e_rep_priv *rpriv = priv->ppriv;
queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
return NOTIFY_OK;
}
return NOTIFY_DONE;
}
static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
{
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
u16 max_mtu;
netdev->min_mtu = ETH_MIN_MTU;
@ -1660,6 +1673,9 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
mlx5e_set_dev_port_mtu(priv);
INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
mlx5e_tc_reoffload_flows_work);
mlx5_lag_add(mdev, netdev);
priv->events_nb.notifier_call = uplink_rep_async_event;
mlx5_notifier_register(mdev, &priv->events_nb);
@ -1672,11 +1688,13 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_delete_app(priv);
#endif
mlx5_notifier_unregister(mdev, &priv->events_nb);
cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
mlx5_lag_remove(mdev);
}

View File

@ -74,6 +74,9 @@ struct mlx5_rep_uplink_priv {
struct notifier_block netdevice_nb;
struct mlx5_tun_entropy tun_entropy;
struct list_head unready_flows;
struct work_struct reoffload_flows_work;
};
struct mlx5e_rep_priv {

View File

@ -117,6 +117,7 @@ struct mlx5e_tc_flow {
struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
struct list_head hairpin; /* flows sharing the same hairpin */
struct list_head peer; /* flows with peer flow */
struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */
union {
struct mlx5_esw_flow_attr esw_attr[0];
struct mlx5_nic_flow_attr nic_attr[0];
@ -928,6 +929,26 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
flow->flags &= ~MLX5E_TC_FLOW_SLOW;
}
static void add_unready_flow(struct mlx5e_tc_flow *flow)
{
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *rpriv;
struct mlx5_eswitch *esw;
esw = flow->priv->mdev->priv.eswitch;
rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &rpriv->uplink_priv;
flow->flags |= MLX5E_TC_FLOW_NOT_READY;
list_add_tail(&flow->unready, &uplink_priv->unready_flows);
}
static void remove_unready_flow(struct mlx5e_tc_flow *flow)
{
list_del(&flow->unready);
flow->flags &= ~MLX5E_TC_FLOW_NOT_READY;
}
static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
@ -1049,6 +1070,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
int out_index;
if (flow->flags & MLX5E_TC_FLOW_NOT_READY) {
remove_unready_flow(flow);
kvfree(attr->parse_attr);
return;
}
@ -2810,7 +2832,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
goto err_free;
flow->flags |= MLX5E_TC_FLOW_NOT_READY;
add_unready_flow(flow);
}
return flow;
@ -3214,3 +3236,18 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
__mlx5e_tc_del_fdb_peer_flow(flow);
}
void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
{
struct mlx5_rep_uplink_priv *rpriv =
container_of(work, struct mlx5_rep_uplink_priv,
reoffload_flows_work);
struct mlx5e_tc_flow *flow, *tmp;
rtnl_lock();
list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
remove_unready_flow(flow);
}
rtnl_unlock();
}

View File

@ -72,6 +72,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags);
void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
#else /* CONFIG_MLX5_ESWITCH */
static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }