net/mlx5e: Properly deal with encap flows add/del under neigh update

Currently, the encap action offload is handled in the actions parse
function and not in mlx5e_tc_add_fdb_flow() where we deal with all
the other aspects of offloading actions (vlan, modify header) and
the rule itself.

When the neigh update code (mlx5e_tc_encap_flows_add()) recreates the
encap entry and offloads the related flows, we wrongly call again into
mlx5e_tc_add_fdb_flow(), this for itself would cause us to handle
again the offloading of vlans and header re-write which puts things
in non consistent state and step on freed memory (e.g the modify
header parse buffer which is already freed).

Since on error, mlx5e_tc_add_fdb_flow() detaches and may release the
encap entry, it causes a corruption at the neigh update code which goes
over the list of flows associated with this encap entry, or double free
when the tc flow is later deleted by user-space.

When neigh update (mlx5e_tc_encap_flows_del()) unoffloads the flows related
to an encap entry which is now invalid, we do a partial repeat of the eswitch
flow removal code which is wrong too.

To fix things up we do the following:

(1) handle the encap action offload in the eswitch flow add function
    mlx5e_tc_add_fdb_flow() as done for the other actions and the rule itself.

(2) modify the neigh update code (mlx5e_tc_encap_flows_add/del) to only
    deal with the encap entry and rules delete/add and not with any of
    the other offloaded actions.

Fixes: 232c001398 ('net/mlx5e: Add support to neighbour update flow')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
This commit is contained in:
Or Gerlitz 2017-10-17 12:33:43 +02:00 committed by Saeed Mahameed
parent 4ca637a20a
commit 3c37745ec6
1 changed files with 54 additions and 35 deletions

View File

@ -78,9 +78,11 @@ struct mlx5e_tc_flow {
};
struct mlx5e_tc_flow_parse_attr {
struct ip_tunnel_info tun_info;
struct mlx5_flow_spec spec;
int num_mod_hdr_actions;
void *mod_hdr_actions;
int mirred_ifindex;
};
enum {
@ -322,6 +324,12 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
static void mlx5e_detach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow);
static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct ip_tunnel_info *tun_info,
struct net_device *mirred_dev,
struct net_device **encap_dev,
struct mlx5e_tc_flow *flow);
static struct mlx5_flow_handle *
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
@ -329,9 +337,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5_flow_handle *rule;
struct net_device *out_dev, *encap_dev = NULL;
struct mlx5_flow_handle *rule = NULL;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
int err;
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
out_dev = __dev_get_by_index(dev_net(priv->netdev),
attr->parse_attr->mirred_ifindex);
err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
out_dev, &encap_dev, flow);
if (err) {
rule = ERR_PTR(err);
if (err != -EAGAIN)
goto err_attach_encap;
}
out_priv = netdev_priv(encap_dev);
rpriv = out_priv->ppriv;
attr->out_rep = rpriv->rep;
}
err = mlx5_eswitch_add_vlan_action(esw, attr);
if (err) {
rule = ERR_PTR(err);
@ -347,10 +373,14 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
}
}
rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
if (IS_ERR(rule))
goto err_add_rule;
/* we get here if (1) there's no error (rule being null) or when
* (2) there's an encap action and we're on -EAGAIN (no valid neigh)
*/
if (rule != ERR_PTR(-EAGAIN)) {
rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
if (IS_ERR(rule))
goto err_add_rule;
}
return rule;
err_add_rule:
@ -361,6 +391,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
err_add_vlan:
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
mlx5e_detach_encap(priv, flow);
err_attach_encap:
return rule;
}
@ -389,6 +420,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5e_tc_flow *flow;
int err;
@ -404,10 +437,9 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
mlx5e_rep_queue_neigh_stats_work(priv);
list_for_each_entry(flow, &e->flows, encap) {
flow->esw_attr->encap_id = e->encap_id;
flow->rule = mlx5e_tc_add_fdb_flow(priv,
flow->esw_attr->parse_attr,
flow);
esw_attr = flow->esw_attr;
esw_attr->encap_id = e->encap_id;
flow->rule = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
if (IS_ERR(flow->rule)) {
err = PTR_ERR(flow->rule);
mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
@ -421,15 +453,13 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow *flow;
struct mlx5_fc *counter;
list_for_each_entry(flow, &e->flows, encap) {
if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
counter = mlx5_flow_rule_counter(flow->rule);
mlx5_del_flow_rules(flow->rule);
mlx5_fc_destroy(priv->mdev, counter);
mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr);
}
}
@ -1942,7 +1972,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
if (is_tcf_mirred_egress_redirect(a)) {
int ifindex = tcf_mirred_ifindex(a);
struct net_device *out_dev, *encap_dev = NULL;
struct net_device *out_dev;
struct mlx5e_priv *out_priv;
out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
@ -1955,17 +1985,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
rpriv = out_priv->ppriv;
attr->out_rep = rpriv->rep;
} else if (encap) {
err = mlx5e_attach_encap(priv, info,
out_dev, &encap_dev, flow);
if (err && err != -EAGAIN)
return err;
parse_attr->mirred_ifindex = ifindex;
parse_attr->tun_info = *info;
attr->parse_attr = parse_attr;
attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
out_priv = netdev_priv(encap_dev);
rpriv = out_priv->ppriv;
attr->out_rep = rpriv->rep;
attr->parse_attr = parse_attr;
/* attr->out_rep is resolved when we handle encap */
} else {
pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
priv->netdev->name, out_dev->name);
@ -2047,7 +2073,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
if (err < 0)
goto err_handle_encap_flow;
goto err_free;
flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
} else {
err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
@ -2058,10 +2084,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
if (IS_ERR(flow->rule)) {
err = PTR_ERR(flow->rule);
goto err_free;
if (err != -EAGAIN)
goto err_free;
}
flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
if (err != -EAGAIN)
flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
err = rhashtable_insert_fast(&tc->ht, &flow->node,
tc->ht_params);
if (err)
@ -2075,16 +2104,6 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
err_del_rule:
mlx5e_tc_del_flow(priv, flow);
err_handle_encap_flow:
if (err == -EAGAIN) {
err = rhashtable_insert_fast(&tc->ht, &flow->node,
tc->ht_params);
if (err)
mlx5e_tc_del_flow(priv, flow);
else
return 0;
}
err_free:
kvfree(parse_attr);
kfree(flow);