From abf4bb6b63d0a54266f8e7eff3720c1974063971 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:06 +0200 Subject: [PATCH 1/7] skbuff: Add the offload_mr_fwd_mark field Similarly to the offload_fwd_mark field, the offload_mr_fwd_mark field is used to allow partial offloading of MFC multicast routes. Switchdev drivers can offload MFC multicast routes to the hardware by registering to the FIB notification chain. When one of the route output interfaces is not offload-able, i.e. has different parent ID, the route cannot be fully offloaded by the hardware. Examples to non-offload-able devices are a management NIC, dummy device, pimreg device, etc. Similar problem exists in the bridge module, as one bridge can hold interfaces with different parent IDs. At the bridge, the problem is solved by the offload_fwd_mark skb field. Currently, when a route cannot go through full offload, the only solution for a switchdev driver is not to offload it at all and let the packet go through slow path. Using the offload_mr_fwd_mark field, a driver can indicate that a packet was already forwarded by hardware to all the devices with the same parent ID as the input device. Further patches in this patch-set are going to enhance ipmr to skip multicast forwarding to devices with the same parent ID if a packets is marked with that field. The reason why the already existing "offload_fwd_mark" bit cannot be used is that a switchdev driver would want to make the distinction between a packet that has already gone through L2 forwarding but did not go through multicast forwarding, and a packet that has already gone through both L2 and multicast forwarding. For example: when a packet is ingressing from a switchport enslaved to a bridge, which is configured with multicast forwarding, the following scenarios are possible: - The packet can be trapped to the CPU due to exception while multicast forwarding (for example, MTU error). In that case, it had already gone through L2 forwarding in the hardware, thus A switchdev driver would want to set the skb->offload_fwd_mark and not the skb->offload_mr_fwd_mark. - The packet can also be trapped due to a pimreg/dummy device used as one of the output interfaces. In that case, it can go through both L2 and (partial) multicast forwarding inside the hardware, thus a switchdev driver would want to set both the skb->offload_fwd_mark and skb->offload_mr_fwd_mark. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 19e64bfb1a66..ada821466e88 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -772,6 +772,7 @@ struct sk_buff { __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; + __u8 offload_mr_fwd_mark:1; #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; From 5d8b3e69fc5e5ccafc9db1251bb7c78a8622fddd Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:07 +0200 Subject: [PATCH 2/7] ipv4: ipmr: Add the parent ID field to VIF struct In order to allow the ipmr module to do partial multicast forwarding according to the device parent ID, add the device parent ID field to the VIF struct. This way, the forwarding path can use the parent ID field without invoking switchdev calls, which requires the RTNL lock. When a new VIF is added, set the device parent ID field in it by invoking the switchdev_port_attr_get call. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 1 + net/ipv4/ipmr.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index b072a84fbe1c..8242d05df35e 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -57,6 +57,7 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule) struct vif_device { struct net_device *dev; /* Device we are using */ + struct netdev_phys_item_id dev_parent_id; /* Device parent ID */ unsigned long bytes_in,bytes_out; unsigned long pkt_in,pkt_out; /* Statistics */ unsigned long rate_limit; /* Traffic shaping (NI) */ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a844738b38bd..1b161ada7ae6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,6 +67,7 @@ #include #include #include +#include struct ipmr_rule { struct fib_rule common; @@ -868,6 +869,9 @@ static int vif_add(struct net *net, struct mr_table *mrt, struct vifctl *vifc, int mrtsock) { int vifi = vifc->vifc_vifi; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, + }; struct vif_device *v = &mrt->vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; @@ -942,6 +946,13 @@ static int vif_add(struct net *net, struct mr_table *mrt, /* Fill in the VIF structures */ + attr.orig_dev = dev; + if (!switchdev_port_attr_get(dev, &attr)) { + memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); + v->dev_parent_id.id_len = attr.u.ppid.id_len; + } else { + v->dev_parent_id.id_len = 0; + } v->rate_limit = vifc->vifc_rate_limit; v->local = vifc->vifc_lcl_addr.s_addr; v->remote = vifc->vifc_rmt_addr.s_addr; From a5bc9294d70fe85729bb343eef281ccbe78ff119 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:08 +0200 Subject: [PATCH 3/7] ipv4: ipmr: Don't forward packets already forwarded by hardware Change the ipmr module to not forward packets if: - The packet is marked with the offload_mr_fwd_mark, and - Both input interface and output interface share the same parent ID. This way, a packet can go through partial multicast forwarding in the hardware, where it will be forwarded only to the devices that share the same parent ID (AKA, reside inside the same hardware). The kernel will forward the packet to all other interfaces. To do this, add the ipmr_offload_forward helper, which per skb, ingress VIF and egress VIF, returns whether the forwarding was offloaded to hardware. The ipmr_queue_xmit frees the skb and does not forward it if the result is a true value. All the forwarding path code compiles out when the CONFIG_NET_SWITCHDEV is not set. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1b161ada7ae6..b3ee01b0551b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1859,10 +1859,33 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk, return dst_output(net, sk, skb); } +#ifdef CONFIG_NET_SWITCHDEV +static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, + int in_vifi, int out_vifi) +{ + struct vif_device *out_vif = &mrt->vif_table[out_vifi]; + struct vif_device *in_vif = &mrt->vif_table[in_vifi]; + + if (!skb->offload_mr_fwd_mark) + return false; + if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) + return false; + return netdev_phys_item_id_same(&out_vif->dev_parent_id, + &in_vif->dev_parent_id); +} +#else +static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, + int in_vifi, int out_vifi) +{ + return false; +} +#endif + /* Processing handlers for ipmr_forward */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc_cache *c, int vifi) + int in_vifi, struct sk_buff *skb, + struct mfc_cache *c, int vifi) { const struct iphdr *iph = ip_hdr(skb); struct vif_device *vif = &mrt->vif_table[vifi]; @@ -1883,6 +1906,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } + if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) + goto out_free; + if (vif->flags & VIFF_TUNNEL) { rt = ip_route_output_ports(net, &fl4, NULL, vif->remote, vif->local, @@ -2060,8 +2086,8 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) - ipmr_queue_xmit(net, mrt, skb2, cache, - psend); + ipmr_queue_xmit(net, mrt, true_vifi, + skb2, cache, psend); } psend = ct; } @@ -2072,9 +2098,10 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) - ipmr_queue_xmit(net, mrt, skb2, cache, psend); + ipmr_queue_xmit(net, mrt, true_vifi, skb2, + cache, psend); } else { - ipmr_queue_xmit(net, mrt, skb, cache, psend); + ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend); return; } } From 267872435515185e2e600a721fdddeea90f96ffa Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:09 +0200 Subject: [PATCH 4/7] mlxsw: acl: Introduce ACL trap and forward action Use trap/discard flex action to implement trap and forward. The action will later be used for multicast routing, as the multicast routing mechanism is done using ACL flexible actions in Spectrum hardware. Using that action, it will be possible to implement a trap-and-forward route. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/core_acl_flex_actions.c | 17 +++++++++++++++++ .../mellanox/mlxsw/core_acl_flex_actions.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index bc55d0e76705..6a979a09ab72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -676,6 +676,7 @@ enum mlxsw_afa_trapdisc_trap_action { MLXSW_ITEM32(afa, trapdisc, trap_action, 0x00, 24, 4); enum mlxsw_afa_trapdisc_forward_action { + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD = 1, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD = 3, }; @@ -729,6 +730,22 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id) } EXPORT_SYMBOL(mlxsw_afa_block_append_trap); +int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, + u16 trap_id) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_TRAPDISC_CODE, + MLXSW_AFA_TRAPDISC_SIZE); + + if (!act) + return -ENOBUFS; + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, + trap_id); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_trap_and_forward); + /* Forwarding Action * ----------------- * Forwarding Action can be used to implement Policy Based Switching (PBS) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 06b0be432b8f..a8d3314c3a24 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -61,6 +61,8 @@ int mlxsw_afa_block_continue(struct mlxsw_afa_block *block); int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); +int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, + u16 trap_id); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port); int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, From a0040c8c935548e1efb1a28f07f15d7ec7918055 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:10 +0200 Subject: [PATCH 5/7] mlxsw: spectrum: Add trap for multicast trap-and-forward routes When a multicast route is configured with trap-and-forward action, the packets should be marked with skb->offload_mr_fwd_mark, in order to prevent the packets from being forwarded again by the kernel ipmr module. Due to this, it is not possible to use the already existing multicast trap (MLXSW_TRAP_ID_ACL1) as the packet should be marked differently. Add the MLXSW_TRAP_ID_ACL2 which is for trap-and-forward multicast routes, and set the offload_mr_fwd_mark skb field in its handler. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 13 +++++++++++++ drivers/net/ethernet/mellanox/mlxsw/trap.h | 2 ++ 2 files changed, 15 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index e9b94430afed..3adf237c951a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3312,6 +3312,14 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port, return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); } +static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb, + u8 local_port, void *priv) +{ + skb->offload_mr_fwd_mark = 1; + skb->offload_fwd_mark = 1; + return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); +} + static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port, void *priv) { @@ -3355,6 +3363,10 @@ static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port, MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) +#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action, \ + _is_ctrl, SP_##_trap_group, DISCARD) + #define MLXSW_SP_EVENTL(_func, _trap_id) \ MLXSW_EVENTL(_func, _trap_id, SP_EVENT) @@ -3425,6 +3437,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false), MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false), MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), + MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index a98103539f6b..ec6cef8267ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -93,6 +93,8 @@ enum { MLXSW_TRAP_ID_ACL0 = 0x1C0, /* Multicast trap used for routes with trap action */ MLXSW_TRAP_ID_ACL1 = 0x1C1, + /* Multicast trap used for routes with trap-and-forward action */ + MLXSW_TRAP_ID_ACL2 = 0x1C2, MLXSW_TRAP_ID_MAX = 0x1FF }; From 607feadef89ac806df5a0be983afef77247e1541 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:11 +0200 Subject: [PATCH 6/7] mlxsw: spectrum: mr_tcam: Add trap-and-forward multicast route In addition to the current multicast route actions, which include trap route action and a forward route action, add the trap-and-forward multicast route action, and implement it in the multicast routing hardware logic. To implement that, add a trap-and-forward ACL action as the last action in the route flexible action set. The used trap is the ACL2 trap, which marks the packets with offload_mr_forward_mark, to prevent the packet from being forwarded again by the kernel. Note: At that stage the offloading logic does not support trap-and-forward multicast routes. This patch adds the support only in the hardware logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h index c851b237d253..5d26a122af49 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h @@ -42,6 +42,7 @@ enum mlxsw_sp_mr_route_action { MLXSW_SP_MR_ROUTE_ACTION_FORWARD, MLXSW_SP_MR_ROUTE_ACTION_TRAP, + MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD, }; enum mlxsw_sp_mr_route_prio { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c index cda9e9ad10e3..3ffb28dd4057 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -253,6 +253,7 @@ mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err; break; + case MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD: case MLXSW_SP_MR_ROUTE_ACTION_FORWARD: /* If we are about to append a multicast router action, commit * the erif_list. @@ -266,6 +267,13 @@ mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, erif_list->kvdl_index); if (err) goto err; + + if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD) { + err = mlxsw_afa_block_append_trap_and_forward(afa_block, + MLXSW_TRAP_ID_ACL2); + if (err) + goto err; + } break; default: err = -EINVAL; From f60c254998de80feaec8e4122960ab64e8045214 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:12 +0200 Subject: [PATCH 7/7] mlxsw: spectrum: mr: Support trap-and-forward routes Add the support of trap-and-forward route action in the multicast routing offloading logic. A route will be set to trap-and-forward action if one (or more) of its output interfaces is not offload-able, i.e. does not have a valid Spectrum RIF. This way, a route with mixed output VIFs list, which contains both offload-able and un-offload-able devices can go through partial offloading in hardware, and the rest will be done in the kernel ipmr module. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_mr.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 4aaf6ca1be7c..1f84bb8e9135 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -114,9 +114,9 @@ static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif) return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif; } -static bool mlxsw_sp_mr_vif_rif_invalid(const struct mlxsw_sp_mr_vif *vif) +static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif) { - return mlxsw_sp_mr_vif_regular(vif) && vif->dev && !vif->rif; + return vif->dev; } static bool @@ -182,14 +182,13 @@ mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route) if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route)) return MLXSW_SP_MR_ROUTE_ACTION_TRAP; - /* If either one of the eVIFs is not regular (VIF of type pimreg or - * tunnel) or one of the VIFs has no matching RIF, trap the packet. + /* If one of the eVIFs has no RIF, trap-and-forward the route as there + * is some more routing to do in software too. */ - list_for_each_entry(rve, &mr_route->evif_list, route_node) { - if (!mlxsw_sp_mr_vif_regular(rve->mr_vif) || - mlxsw_sp_mr_vif_rif_invalid(rve->mr_vif)) - return MLXSW_SP_MR_ROUTE_ACTION_TRAP; - } + list_for_each_entry(rve, &mr_route->evif_list, route_node) + if (mlxsw_sp_mr_vif_exists(rve->mr_vif) && !rve->mr_vif->rif) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD; + return MLXSW_SP_MR_ROUTE_ACTION_FORWARD; }