mpls: allow TTL propagation to IP packets to be configured
Provide the ability to control on a per-route basis whether the TTL value from an MPLS packet is propagated to an IPv4/IPv6 packet when the last label is popped as per the theoretical model in RFC 3443 through a new route attribute, RTA_TTL_PROPAGATE which can be 0 to mean disable propagation and 1 to mean enable propagation. In order to provide the ability to change the behaviour for packets arriving with IPv4/IPv6 Explicit Null labels and to provide an easy way for a user to change the behaviour for all existing routes without having to reprogram them, a global knob is provided. This is done through the addition of a new per-namespace sysctl, "net.mpls.ip_ttl_propagate", which defaults to enabled. If the per-route attribute is set (either enabled or disabled) then it overrides the global configuration. Signed-off-by: Robert Shearman <rshearma@brocade.com> Acked-by: David Ahern <dsa@cumulusnetworks.com> Tested-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
b66239b682
commit
5b441ac878
|
@ -19,6 +19,17 @@ platform_labels - INTEGER
|
|||
Possible values: 0 - 1048575
|
||||
Default: 0
|
||||
|
||||
ip_ttl_propagate - BOOL
|
||||
Control whether TTL is propagated from the IPv4/IPv6 header to
|
||||
the MPLS header on imposing labels and propagated from the
|
||||
MPLS header to the IPv4/IPv6 header on popping the last label.
|
||||
|
||||
If disabled, the MPLS transport network will appear as a
|
||||
single hop to transit traffic.
|
||||
|
||||
0 - disabled / RFC 3443 [Short] Pipe Model
|
||||
1 - enabled / RFC 3443 Uniform Model (default)
|
||||
|
||||
conf/<interface>/input - BOOL
|
||||
Control whether packets can be input on this interface.
|
||||
|
||||
|
|
|
@ -9,8 +9,10 @@ struct mpls_route;
|
|||
struct ctl_table_header;
|
||||
|
||||
struct netns_mpls {
|
||||
int ip_ttl_propagate;
|
||||
size_t platform_labels;
|
||||
struct mpls_route __rcu * __rcu *platform_label;
|
||||
|
||||
struct ctl_table_header *ctl;
|
||||
};
|
||||
|
||||
|
|
|
@ -319,6 +319,7 @@ enum rtattr_type_t {
|
|||
RTA_EXPIRES,
|
||||
RTA_PAD,
|
||||
RTA_UID,
|
||||
RTA_TTL_PROPAGATE,
|
||||
__RTA_MAX
|
||||
};
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
|
||||
|
||||
static int zero = 0;
|
||||
static int one = 1;
|
||||
static int label_limit = (1 << 20) - 1;
|
||||
|
||||
static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
|
||||
|
@ -220,8 +221,8 @@ static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
|
|||
return &rt->rt_nh[nh_index];
|
||||
}
|
||||
|
||||
static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
|
||||
struct mpls_entry_decoded dec)
|
||||
static bool mpls_egress(struct net *net, struct mpls_route *rt,
|
||||
struct sk_buff *skb, struct mpls_entry_decoded dec)
|
||||
{
|
||||
enum mpls_payload_type payload_type;
|
||||
bool success = false;
|
||||
|
@ -246,22 +247,46 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
|
|||
switch (payload_type) {
|
||||
case MPT_IPV4: {
|
||||
struct iphdr *hdr4 = ip_hdr(skb);
|
||||
u8 new_ttl;
|
||||
skb->protocol = htons(ETH_P_IP);
|
||||
|
||||
/* If propagating TTL, take the decremented TTL from
|
||||
* the incoming MPLS header, otherwise decrement the
|
||||
* TTL, but only if not 0 to avoid underflow.
|
||||
*/
|
||||
if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
|
||||
(rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
|
||||
net->mpls.ip_ttl_propagate))
|
||||
new_ttl = dec.ttl;
|
||||
else
|
||||
new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0;
|
||||
|
||||
csum_replace2(&hdr4->check,
|
||||
htons(hdr4->ttl << 8),
|
||||
htons(dec.ttl << 8));
|
||||
hdr4->ttl = dec.ttl;
|
||||
htons(new_ttl << 8));
|
||||
hdr4->ttl = new_ttl;
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
case MPT_IPV6: {
|
||||
struct ipv6hdr *hdr6 = ipv6_hdr(skb);
|
||||
skb->protocol = htons(ETH_P_IPV6);
|
||||
hdr6->hop_limit = dec.ttl;
|
||||
|
||||
/* If propagating TTL, take the decremented TTL from
|
||||
* the incoming MPLS header, otherwise decrement the
|
||||
* hop limit, but only if not 0 to avoid underflow.
|
||||
*/
|
||||
if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
|
||||
(rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
|
||||
net->mpls.ip_ttl_propagate))
|
||||
hdr6->hop_limit = dec.ttl;
|
||||
else if (hdr6->hop_limit)
|
||||
hdr6->hop_limit = hdr6->hop_limit - 1;
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
case MPT_UNSPEC:
|
||||
/* Should have decided which protocol it is by now */
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -361,7 +386,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
|
|||
|
||||
if (unlikely(!new_header_size && dec.bos)) {
|
||||
/* Penultimate hop popping */
|
||||
if (!mpls_egress(rt, skb, dec))
|
||||
if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
|
||||
goto err;
|
||||
} else {
|
||||
bool bos;
|
||||
|
@ -412,6 +437,7 @@ static struct packet_type mpls_packet_type __read_mostly = {
|
|||
static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
|
||||
[RTA_DST] = { .type = NLA_U32 },
|
||||
[RTA_OIF] = { .type = NLA_U32 },
|
||||
[RTA_TTL_PROPAGATE] = { .type = NLA_U8 },
|
||||
};
|
||||
|
||||
struct mpls_route_config {
|
||||
|
@ -421,6 +447,7 @@ struct mpls_route_config {
|
|||
u8 rc_via_alen;
|
||||
u8 rc_via[MAX_VIA_ALEN];
|
||||
u32 rc_label;
|
||||
u8 rc_ttl_propagate;
|
||||
u8 rc_output_labels;
|
||||
u32 rc_output_label[MAX_NEW_LABELS];
|
||||
u32 rc_nlflags;
|
||||
|
@ -856,6 +883,7 @@ static int mpls_route_add(struct mpls_route_config *cfg)
|
|||
|
||||
rt->rt_protocol = cfg->rc_protocol;
|
||||
rt->rt_payload_type = cfg->rc_payload_type;
|
||||
rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
|
||||
|
||||
if (cfg->rc_mp)
|
||||
err = mpls_nh_build_multi(cfg, rt);
|
||||
|
@ -1576,6 +1604,7 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|||
cfg->rc_label = LABEL_NOT_SPECIFIED;
|
||||
cfg->rc_protocol = rtm->rtm_protocol;
|
||||
cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC;
|
||||
cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
|
||||
cfg->rc_nlflags = nlh->nlmsg_flags;
|
||||
cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid;
|
||||
cfg->rc_nlinfo.nlh = nlh;
|
||||
|
@ -1622,6 +1651,17 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|||
cfg->rc_mp_len = nla_len(nla);
|
||||
break;
|
||||
}
|
||||
case RTA_TTL_PROPAGATE:
|
||||
{
|
||||
u8 ttl_propagate = nla_get_u8(nla);
|
||||
|
||||
if (ttl_propagate > 1)
|
||||
goto errout;
|
||||
cfg->rc_ttl_propagate = ttl_propagate ?
|
||||
MPLS_TTL_PROP_ENABLED :
|
||||
MPLS_TTL_PROP_DISABLED;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
/* Unsupported attribute */
|
||||
goto errout;
|
||||
|
@ -1682,6 +1722,15 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
|
|||
|
||||
if (nla_put_labels(skb, RTA_DST, 1, &label))
|
||||
goto nla_put_failure;
|
||||
|
||||
if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) {
|
||||
bool ttl_propagate =
|
||||
rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED;
|
||||
|
||||
if (nla_put_u8(skb, RTA_TTL_PROPAGATE,
|
||||
ttl_propagate))
|
||||
goto nla_put_failure;
|
||||
}
|
||||
if (rt->rt_nhn == 1) {
|
||||
const struct mpls_nh *nh = rt->rt_nh;
|
||||
|
||||
|
@ -1792,7 +1841,8 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
|
|||
{
|
||||
size_t payload =
|
||||
NLMSG_ALIGN(sizeof(struct rtmsg))
|
||||
+ nla_total_size(4); /* RTA_DST */
|
||||
+ nla_total_size(4) /* RTA_DST */
|
||||
+ nla_total_size(1); /* RTA_TTL_PROPAGATE */
|
||||
|
||||
if (rt->rt_nhn == 1) {
|
||||
struct mpls_nh *nh = rt->rt_nh;
|
||||
|
@ -1876,6 +1926,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
|
|||
RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
|
||||
rt0->rt_protocol = RTPROT_KERNEL;
|
||||
rt0->rt_payload_type = MPT_IPV4;
|
||||
rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
|
||||
rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
|
||||
rt0->rt_nh->nh_via_alen = lo->addr_len;
|
||||
memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
|
||||
|
@ -1889,6 +1940,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
|
|||
RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
|
||||
rt2->rt_protocol = RTPROT_KERNEL;
|
||||
rt2->rt_payload_type = MPT_IPV6;
|
||||
rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
|
||||
rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
|
||||
rt2->rt_nh->nh_via_alen = lo->addr_len;
|
||||
memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
|
||||
|
@ -1970,6 +2022,9 @@ static int mpls_platform_labels(struct ctl_table *table, int write,
|
|||
return ret;
|
||||
}
|
||||
|
||||
#define MPLS_NS_SYSCTL_OFFSET(field) \
|
||||
(&((struct net *)0)->field)
|
||||
|
||||
static const struct ctl_table mpls_table[] = {
|
||||
{
|
||||
.procname = "platform_labels",
|
||||
|
@ -1978,21 +2033,37 @@ static const struct ctl_table mpls_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = mpls_platform_labels,
|
||||
},
|
||||
{
|
||||
.procname = "ip_ttl_propagate",
|
||||
.data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate),
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
static int mpls_net_init(struct net *net)
|
||||
{
|
||||
struct ctl_table *table;
|
||||
int i;
|
||||
|
||||
net->mpls.platform_labels = 0;
|
||||
net->mpls.platform_label = NULL;
|
||||
net->mpls.ip_ttl_propagate = 1;
|
||||
|
||||
table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
|
||||
if (table == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
table[0].data = net;
|
||||
/* Table data contains only offsets relative to the base of
|
||||
* the mdev at this point, so make them absolute.
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(mpls_table) - 1; i++)
|
||||
table[i].data = (char *)net + (uintptr_t)table[i].data;
|
||||
|
||||
net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
|
||||
if (net->mpls.ctl == NULL) {
|
||||
kfree(table);
|
||||
|
|
|
@ -90,6 +90,12 @@ struct mpls_nh { /* next hop label forwarding entry */
|
|||
u8 nh_via_table;
|
||||
};
|
||||
|
||||
enum mpls_ttl_propagation {
|
||||
MPLS_TTL_PROP_DEFAULT,
|
||||
MPLS_TTL_PROP_ENABLED,
|
||||
MPLS_TTL_PROP_DISABLED,
|
||||
};
|
||||
|
||||
/* The route, nexthops and vias are stored together in the same memory
|
||||
* block:
|
||||
*
|
||||
|
@ -116,6 +122,7 @@ struct mpls_route { /* next hop label forwarding entry */
|
|||
u8 rt_protocol;
|
||||
u8 rt_payload_type;
|
||||
u8 rt_max_alen;
|
||||
u8 rt_ttl_propagate;
|
||||
unsigned int rt_nhn;
|
||||
unsigned int rt_nhn_alive;
|
||||
struct mpls_nh rt_nh[0];
|
||||
|
|
Loading…
Reference in New Issue