From 3c645621b79828be7a46fb2694eb423b343b4bbe Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 25 Aug 2015 20:06:31 -0700 Subject: [PATCH 1/5] net_sched: make tcf_hash_destroy() static tcf_hash_destroy() used once. Make it static. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/act_api.h | 1 - net/sched/act_api.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 4519c81304bd..9d446f136607 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -111,7 +111,6 @@ struct tc_action_ops { }; int tcf_hash_search(struct tc_action *a, u32 index); -void tcf_hash_destroy(struct tc_action *a); u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo); int tcf_hash_check(u32 index, struct tc_action *a, int bind); int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, diff --git a/net/sched/act_api.c b/net/sched/act_api.c index b087087ccfa9..06e7c4a37245 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -36,7 +36,7 @@ static void free_tcf(struct rcu_head *head) kfree(p); } -void tcf_hash_destroy(struct tc_action *a) +static void tcf_hash_destroy(struct tc_action *a) { struct tcf_common *p = a->priv; struct tcf_hashinfo *hinfo = a->ops->hinfo; @@ -52,7 +52,6 @@ void tcf_hash_destroy(struct tc_action *a) */ call_rcu(&p->tcfc_rcu, free_tcf); } -EXPORT_SYMBOL(tcf_hash_destroy); int __tcf_hash_release(struct tc_action *a, bool bind, bool strict) { From faa54be4c78da6aa2148c539659867afdecd3e8d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 25 Aug 2015 20:06:32 -0700 Subject: [PATCH 2/5] net_sched: act_bpf: remove unnecessary copy Fix harmless typo and avoid unnecessary copy of empty 'prog' into unused 'strcut tcf_bpf_cfg old'. Fixes: f4eaed28c783 ("act_bpf: fix memory leaks when replacing bpf programs") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sched/act_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 1b97dabc621a..458cf647e698 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -327,7 +327,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, prog = to_bpf(act); spin_lock_bh(&prog->tcf_lock); - if (ret != ACT_P_CREATED) + if (res != ACT_P_CREATED) tcf_bpf_prog_fill_cfg(prog, &old); prog->bpf_ops = cfg.bpf_ops; From ed7aa879ce1a0ff6468c5a6d3eb4e31169b95c4a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 25 Aug 2015 20:06:33 -0700 Subject: [PATCH 3/5] net_sched: convert tcindex to call tcf_exts_destroy from rcu callback Adjust destroy path of cls_tcindex to call tcf_exts_destroy() after rcu grace period. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index a557dbaf5afe..944c8ff45055 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -27,6 +27,7 @@ struct tcindex_filter_result { struct tcf_exts exts; struct tcf_result res; + struct rcu_head rcu; }; struct tcindex_filter { @@ -133,8 +134,23 @@ static int tcindex_init(struct tcf_proto *tp) return 0; } -static int -tcindex_delete(struct tcf_proto *tp, unsigned long arg) +static void tcindex_destroy_rexts(struct rcu_head *head) +{ + struct tcindex_filter_result *r; + + r = container_of(head, struct tcindex_filter_result, rcu); + tcf_exts_destroy(&r->exts); +} + +static void tcindex_destroy_fexts(struct rcu_head *head) +{ + struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu); + + tcf_exts_destroy(&f->result.exts); + kfree(f); +} + +static int tcindex_delete(struct tcf_proto *tp, unsigned long arg) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; @@ -162,9 +178,14 @@ tcindex_delete(struct tcf_proto *tp, unsigned long arg) rcu_assign_pointer(*walk, rtnl_dereference(f->next)); } tcf_unbind_filter(tp, &r->res); - tcf_exts_destroy(&r->exts); + /* all classifiers are required to call tcf_exts_destroy() after rcu + * grace period, since converted-to-rcu actions are relying on that + * in cleanup() callback + */ if (f) - kfree_rcu(f, rcu); + call_rcu(&f->rcu, tcindex_destroy_fexts); + else + call_rcu(&r->rcu, tcindex_destroy_rexts); return 0; } From 9e528d89154b602209fda2fb92e89927bbffc976 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 25 Aug 2015 20:06:34 -0700 Subject: [PATCH 4/5] net_sched: convert rsvp to call tcf_exts_destroy from rcu callback Adjust destroy path of cls_rsvp to call tcf_exts_destroy() after rcu grace period. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/sched/cls_rsvp.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 02fa82792dab..f9c9fc075fe6 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -283,12 +283,22 @@ static int rsvp_init(struct tcf_proto *tp) return -ENOBUFS; } -static void -rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) +static void rsvp_delete_filter_rcu(struct rcu_head *head) +{ + struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu); + + tcf_exts_destroy(&f->exts); + kfree(f); +} + +static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) { tcf_unbind_filter(tp, &f->res); - tcf_exts_destroy(&f->exts); - kfree_rcu(f, rcu); + /* all classifiers are required to call tcf_exts_destroy() after rcu + * grace period, since converted-to-rcu actions are relying on that + * in cleanup() callback + */ + call_rcu(&f->rcu, rsvp_delete_filter_rcu); } static bool rsvp_destroy(struct tcf_proto *tp, bool force) From cff82457c5584f6a96d2b85d1a88b81ba304a330 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 25 Aug 2015 20:06:35 -0700 Subject: [PATCH 5/5] net_sched: act_bpf: remove spinlock in fast path Similar to act_gact/act_mirred, act_bpf can be lockless in packet processing with extra care taken to free bpf programs after rcu grace period. Replacement of existing act_bpf (very rare) is done with synchronize_rcu() and final destruction is done from tc_action_ops->cleanup() callback that is called from tcf_exts_destroy()->tcf_action_destroy()->__tcf_hash_release() when bind and refcnt reach zero which is only possible when classifier is destroyed. Previous two patches fixed the last two classifiers (tcindex and rsvp) to call tcf_exts_destroy() from rcu callback. Similar to gact/mirred there is a race between prog->filter and prog->tcf_action. Meaning that the program being replaced may use previous default action if it happened to return TC_ACT_UNSPEC. act_mirred race betwen tcf_action and tcfm_dev is similar. In all cases the race is harmless. Long term we may want to improve the situation by replacing the whole tc_action->priv as single pointer instead of updating inner fields one by one. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/tc_act/tc_bpf.h | 2 +- net/sched/act_bpf.c | 36 +++++++++++++++++++----------------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h index a152e9858b2c..958d69cfb19c 100644 --- a/include/net/tc_act/tc_bpf.h +++ b/include/net/tc_act/tc_bpf.h @@ -15,7 +15,7 @@ struct tcf_bpf { struct tcf_common common; - struct bpf_prog *filter; + struct bpf_prog __rcu *filter; union { u32 bpf_fd; u16 bpf_num_ops; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 458cf647e698..559bfa011bda 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -37,25 +37,24 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { struct tcf_bpf *prog = act->priv; + struct bpf_prog *filter; int action, filter_res; bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; if (unlikely(!skb_mac_header_was_set(skb))) return TC_ACT_UNSPEC; - spin_lock(&prog->tcf_lock); + tcf_lastuse_update(&prog->tcf_tm); + bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); - prog->tcf_tm.lastuse = jiffies; - bstats_update(&prog->tcf_bstats, skb); - - /* Needed here for accessing maps. */ rcu_read_lock(); + filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); - filter_res = BPF_PROG_RUN(prog->filter, skb); + filter_res = BPF_PROG_RUN(filter, skb); __skb_pull(skb, skb->mac_len); } else { - filter_res = BPF_PROG_RUN(prog->filter, skb); + filter_res = BPF_PROG_RUN(filter, skb); } rcu_read_unlock(); @@ -77,7 +76,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, break; case TC_ACT_SHOT: action = filter_res; - prog->tcf_qstats.drops++; + qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats)); break; case TC_ACT_UNSPEC: action = prog->tcf_action; @@ -87,7 +86,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, break; } - spin_unlock(&prog->tcf_lock); return action; } @@ -263,7 +261,10 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, struct tcf_bpf_cfg *cfg) { cfg->is_ebpf = tcf_bpf_is_ebpf(prog); - cfg->filter = prog->filter; + /* updates to prog->filter are prevented, since it's called either + * with rtnl lock or during final cleanup in rcu callback + */ + cfg->filter = rcu_dereference_protected(prog->filter, 1); cfg->bpf_ops = prog->bpf_ops; cfg->bpf_name = prog->bpf_name; @@ -294,7 +295,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (!tcf_hash_check(parm->index, act, bind)) { ret = tcf_hash_create(parm->index, est, act, - sizeof(*prog), bind, false); + sizeof(*prog), bind, true); if (ret < 0) return ret; @@ -325,7 +326,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, goto out; prog = to_bpf(act); - spin_lock_bh(&prog->tcf_lock); + ASSERT_RTNL(); if (res != ACT_P_CREATED) tcf_bpf_prog_fill_cfg(prog, &old); @@ -339,14 +340,15 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, prog->bpf_fd = cfg.bpf_fd; prog->tcf_action = parm->action; - prog->filter = cfg.filter; + rcu_assign_pointer(prog->filter, cfg.filter); - spin_unlock_bh(&prog->tcf_lock); - - if (res == ACT_P_CREATED) + if (res == ACT_P_CREATED) { tcf_hash_insert(act); - else + } else { + /* make sure the program being replaced is no longer executing */ + synchronize_rcu(); tcf_bpf_cfg_cleanup(&old); + } return res; out: