diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 1b214ea4619a..e3e693bf0dec 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -35,6 +35,7 @@ #include #include #include +#include #define RT_FL_TOS(oldflp4) \ ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) @@ -42,6 +43,9 @@ #define DRV_NAME "vrf" #define DRV_VERSION "1.0" +#define FIB_RULE_PREF 1000 /* default preference for FIB rules */ +static bool add_fib_rules = true; + struct net_vrf { struct rtable __rcu *rth; struct rtable __rcu *rth_local; @@ -897,6 +901,91 @@ static const struct ethtool_ops vrf_ethtool_ops = { .get_drvinfo = vrf_get_drvinfo, }; +static inline size_t vrf_fib_rule_nl_size(void) +{ + size_t sz; + + sz = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)); + sz += nla_total_size(sizeof(u8)); /* FRA_L3MDEV */ + sz += nla_total_size(sizeof(u32)); /* FRA_PRIORITY */ + + return sz; +} + +static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) +{ + struct fib_rule_hdr *frh; + struct nlmsghdr *nlh; + struct sk_buff *skb; + int err; + + skb = nlmsg_new(vrf_fib_rule_nl_size(), GFP_KERNEL); + if (!skb) + return -ENOMEM; + + nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*frh), 0); + if (!nlh) + goto nla_put_failure; + + /* rule only needs to appear once */ + nlh->nlmsg_flags &= NLM_F_EXCL; + + frh = nlmsg_data(nlh); + memset(frh, 0, sizeof(*frh)); + frh->family = family; + frh->action = FR_ACT_TO_TBL; + + if (nla_put_u32(skb, FRA_L3MDEV, 1)) + goto nla_put_failure; + + if (nla_put_u32(skb, FRA_PRIORITY, FIB_RULE_PREF)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + + /* fib_nl_{new,del}rule handling looks for net from skb->sk */ + skb->sk = dev_net(dev)->rtnl; + if (add_it) { + err = fib_nl_newrule(skb, nlh); + if (err == -EEXIST) + err = 0; + } else { + err = fib_nl_delrule(skb, nlh); + if (err == -ENOENT) + err = 0; + } + nlmsg_free(skb); + + return err; + +nla_put_failure: + nlmsg_free(skb); + + return -EMSGSIZE; +} + +static int vrf_add_fib_rules(const struct net_device *dev) +{ + int err; + + err = vrf_fib_rule(dev, AF_INET, true); + if (err < 0) + goto out_err; + + err = vrf_fib_rule(dev, AF_INET6, true); + if (err < 0) + goto ipv6_err; + + return 0; + +ipv6_err: + vrf_fib_rule(dev, AF_INET, false); + +out_err: + netdev_err(dev, "Failed to add FIB rules.\n"); + return err; +} + static void vrf_setup(struct net_device *dev) { ether_setup(dev); @@ -937,6 +1026,7 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net_vrf *vrf = netdev_priv(dev); + int err; if (!data || !data[IFLA_VRF_TABLE]) return -EINVAL; @@ -945,7 +1035,21 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, dev->priv_flags |= IFF_L3MDEV_MASTER; - return register_netdevice(dev); + err = register_netdevice(dev); + if (err) + goto out; + + if (add_fib_rules) { + err = vrf_add_fib_rules(dev); + if (err) { + unregister_netdevice(dev); + goto out; + } + add_fib_rules = false; + } + +out: + return err; } static size_t vrf_nl_getsize(const struct net_device *dev) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 59160de702b6..456e4a6006ab 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -17,7 +17,8 @@ struct fib_rule { u32 flags; u32 table; u8 action; - /* 3 bytes hole, try to use */ + u8 l3mdev; + /* 2 bytes hole, try to use */ u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; @@ -36,6 +37,7 @@ struct fib_lookup_arg { void *lookup_ptr; void *result; struct fib_rule *rule; + u32 table; int flags; #define FIB_LOOKUP_NOREF 1 #define FIB_LOOKUP_IGNORE_LINKSTATE 2 @@ -89,7 +91,8 @@ struct fib_rules_ops { [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 } + [FRA_GOTO] = { .type = NLA_U32 }, \ + [FRA_L3MDEV] = { .type = NLA_U8 } static inline void fib_rule_get(struct fib_rule *rule) { @@ -102,6 +105,20 @@ static inline void fib_rule_put(struct fib_rule *rule) kfree_rcu(rule, rcu); } +#ifdef CONFIG_NET_L3_MASTER_DEV +static inline u32 fib_rule_get_table(struct fib_rule *rule, + struct fib_lookup_arg *arg) +{ + return rule->l3mdev ? arg->table : rule->table; +} +#else +static inline u32 fib_rule_get_table(struct fib_rule *rule, + struct fib_lookup_arg *arg) +{ + return rule->table; +} +#endif + static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) { if (nla[FRA_TABLE]) @@ -117,4 +134,7 @@ int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table, u32 flags); + +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh); +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh); #endif diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 374388dc01c8..34f33eb96a5e 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -11,6 +11,8 @@ #ifndef _NET_L3MDEV_H_ #define _NET_L3MDEV_H_ +#include + /** * struct l3mdev_ops - l3mdev operations * @@ -41,6 +43,9 @@ struct l3mdev_ops { #ifdef CONFIG_NET_L3_MASTER_DEV +int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, + struct fib_lookup_arg *arg); + int l3mdev_master_ifindex_rcu(const struct net_device *dev); static inline int l3mdev_master_ifindex(struct net_device *dev) { @@ -236,6 +241,13 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) { return skb; } + +static inline +int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, + struct fib_lookup_arg *arg) +{ + return 1; +} #endif #endif /* _NET_L3MDEV_H_ */ diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 620c8a5ddc00..14404b3ebb89 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -50,6 +50,7 @@ enum { FRA_FWMASK, /* mask for netfilter mark */ FRA_OIFNAME, FRA_PAD, + FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ __FRA_MAX }; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 840acebbb80c..98298b11f534 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -173,7 +173,8 @@ void fib_rules_unregister(struct fib_rules_ops *ops) EXPORT_SYMBOL_GPL(fib_rules_unregister); static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, - struct flowi *fl, int flags) + struct flowi *fl, int flags, + struct fib_lookup_arg *arg) { int ret = 0; @@ -189,6 +190,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id)) goto out; + if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -204,7 +208,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, list_for_each_entry_rcu(rule, &ops->rules_list, list) { jumped: - if (!fib_rule_match(rule, ops, fl, flags)) + if (!fib_rule_match(rule, ops, fl, flags, arg)) continue; if (rule->action == FR_ACT_GOTO) { @@ -265,7 +269,7 @@ static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, return err; } -static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); @@ -336,6 +340,14 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) if (tb[FRA_TUN_ID]) rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]); + if (tb[FRA_L3MDEV]) { +#ifdef CONFIG_NET_L3_MASTER_DEV + rule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]); + if (rule->l3mdev != 1) +#endif + goto errout_free; + } + rule->action = frh->action; rule->flags = frh->flags; rule->table = frh_get_table(frh, tb); @@ -371,6 +383,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) } else if (rule->action == FR_ACT_GOTO) goto errout_free; + if (rule->l3mdev && rule->table) + goto errout_free; + err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; @@ -424,8 +439,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) rules_ops_put(ops); return err; } +EXPORT_SYMBOL_GPL(fib_nl_newrule); -static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); @@ -483,6 +499,10 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID]))) continue; + if (tb[FRA_L3MDEV] && + (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV]))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -536,6 +556,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) rules_ops_put(ops); return err; } +EXPORT_SYMBOL_GPL(fib_nl_delrule); static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, struct fib_rule *rule) @@ -607,7 +628,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->target && nla_put_u32(skb, FRA_GOTO, rule->target)) || (rule->tun_id && - nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD))) + nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) || + (rule->l3mdev && + nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index f2bda9e89c61..6e9ea69e5f75 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -76,6 +76,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, { int err = -EAGAIN; struct fib_table *tbl; + u32 tb_id; switch (rule->action) { case FR_ACT_TO_TBL: @@ -94,7 +95,8 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, rcu_read_lock(); - tbl = fib_get_table(rule->fr_net, rule->table); + tb_id = fib_rule_get_table(rule, arg); + tbl = fib_get_table(rule->fr_net, tb_id); if (tbl) err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *)arg->result, @@ -180,7 +182,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (err) goto errout; - if (rule->table == RT_TABLE_UNSPEC) { + if (rule->table == RT_TABLE_UNSPEC && !rule->l3mdev) { if (rule->action == FR_ACT_TO_TBL) { struct fib_table *table; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index ed33abf57abd..5857c1fc8b67 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -67,6 +67,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, struct net *net = rule->fr_net; pol_lookup_t lookup = arg->lookup_ptr; int err = 0; + u32 tb_id; switch (rule->action) { case FR_ACT_TO_TBL: @@ -86,7 +87,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto discard_pkt; } - table = fib6_get_table(net, rule->table); + tb_id = fib_rule_get_table(rule, arg); + table = fib6_get_table(net, tb_id); if (!table) { err = -EAGAIN; goto out; @@ -199,7 +201,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct net *net = sock_net(skb->sk); struct fib6_rule *rule6 = (struct fib6_rule *) rule; - if (rule->action == FR_ACT_TO_TBL) { + if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) { if (rule->table == RT6_TABLE_UNSPEC) goto errout; diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index 6651a78e100c..7da97809a7e8 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -10,6 +10,7 @@ */ #include +#include #include /** @@ -160,3 +161,40 @@ int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4) return rc; } EXPORT_SYMBOL_GPL(l3mdev_get_saddr); + +/** + * l3mdev_fib_rule_match - Determine if flowi references an + * L3 master device + * @net: network namespace for device index lookup + * @fl: flow struct + */ + +int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, + struct fib_lookup_arg *arg) +{ + struct net_device *dev; + int rc = 0; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, fl->flowi_oif); + if (dev && netif_is_l3_master(dev) && + dev->l3mdev_ops->l3mdev_fib_table) { + arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); + rc = 1; + goto out; + } + + dev = dev_get_by_index_rcu(net, fl->flowi_iif); + if (dev && netif_is_l3_master(dev) && + dev->l3mdev_ops->l3mdev_fib_table) { + arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); + rc = 1; + goto out; + } + +out: + rcu_read_unlock(); + + return rc; +}