From 1b5c5493e3e68181be344cb51bf9df192d05ffc2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 20:21:50 -0700 Subject: [PATCH 01/10] net_sched: add the ability to defer skb freeing qdisc are changed under RTNL protection and often while blocking BH and root qdisc spinlock. When lots of skbs need to be dropped, we free them under these locks causing TX/RX freezes, and more generally latency spikes. This commit adds rtnl_kfree_skbs(), used to queue skbs for deferred freeing. Actual freeing happens right after RTNL is released, with appropriate scheduling points. rtnl_qdisc_drop() can also be used in place of disc_drop() when RTNL is held. qdisc_reset_queue() and __qdisc_reset_queue() get the new behavior, so standard qdiscs like pfifo, pfifo_fast... have their ->reset() method automatically handled. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 5 +++-- include/net/sch_generic.h | 16 ++++++++++++---- net/core/rtnetlink.c | 22 ++++++++++++++++++++++ net/sched/sch_generic.c | 2 +- 4 files changed, 38 insertions(+), 7 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index c006cc900c44..2daece8979f7 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -89,8 +89,9 @@ void net_inc_egress_queue(void); void net_dec_egress_queue(void); #endif -extern void rtnetlink_init(void); -extern void __rtnl_unlock(void); +void rtnetlink_init(void); +void __rtnl_unlock(void); +void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); #define ASSERT_RTNL() do { \ if (unlikely(!rtnl_is_locked())) { \ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9a0d177884c6..4f7cee8344c4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -683,19 +683,21 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) return skb; } -static inline void __qdisc_reset_queue(struct Qdisc *sch, - struct sk_buff_head *list) +static inline void __qdisc_reset_queue(struct sk_buff_head *list) { /* * We do not know the backlog in bytes of this list, it * is up to the caller to correct it */ - __skb_queue_purge(list); + if (!skb_queue_empty(list)) { + rtnl_kfree_skbs(list->next, list->prev); + __skb_queue_head_init(list); + } } static inline void qdisc_reset_queue(struct Qdisc *sch) { - __qdisc_reset_queue(sch, &sch->q); + __qdisc_reset_queue(&sch->q); sch->qstats.backlog = 0; } @@ -716,6 +718,12 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, return old; } +static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) +{ + rtnl_kfree_skbs(skb, skb); + qdisc_qstats_drop(sch); +} + static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) { kfree_skb(skb); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d69c4644f8f2..eb49ca24274a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -71,9 +71,31 @@ void rtnl_lock(void) } EXPORT_SYMBOL(rtnl_lock); +static struct sk_buff *defer_kfree_skb_list; +void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail) +{ + if (head && tail) { + tail->next = defer_kfree_skb_list; + defer_kfree_skb_list = head; + } +} +EXPORT_SYMBOL(rtnl_kfree_skbs); + void __rtnl_unlock(void) { + struct sk_buff *head = defer_kfree_skb_list; + + defer_kfree_skb_list = NULL; + mutex_unlock(&rtnl_mutex); + + while (head) { + struct sk_buff *next = head->next; + + kfree_skb(head); + cond_resched(); + head = next; + } } void rtnl_unlock(void) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0c9cb516f2e3..773b632e1e33 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -493,7 +493,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc) struct pfifo_fast_priv *priv = qdisc_priv(qdisc); for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __qdisc_reset_queue(qdisc, band2list(priv, prio)); + __qdisc_reset_queue(band2list(priv, prio)); priv->bitmap = 0; qdisc->qstats.backlog = 0; From f9aed311b682ca933ca78821cce3e4f1d69d4f56 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 20:21:51 -0700 Subject: [PATCH 02/10] net_sched: sch_choke: defer skb freeing choke_reset() and choke_change() can use rtnl_qdisc_drop() to defer expensive skb freeing after locks are released. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_choke.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 04e0b0583e00..789b69ee9e51 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -375,11 +375,11 @@ static void choke_reset(struct Qdisc *sch) q->head = (q->head + 1) & q->tab_mask; if (!skb) continue; - qdisc_qstats_backlog_dec(sch, skb); - --sch->q.qlen; - qdisc_drop(skb, sch); + rtnl_qdisc_drop(skb, sch); } + sch->q.qlen = 0; + sch->qstats.backlog = 0; memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); q->head = q->tail = 0; red_restart(&q->vars); @@ -455,7 +455,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt) dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); --sch->q.qlen; - qdisc_drop(skb, sch); + rtnl_qdisc_drop(skb, sch); } qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped); q->head = 0; From b3d7e2b29b226c986cbd4efcaf43ab3ff90e6fdb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 20:21:52 -0700 Subject: [PATCH 03/10] net_sched: sch_codel: defer skb freeing in codel_change() codel_change() can use rtnl_qdisc_drop() to defer expensive skb freeing after locks are released. codel_reset() already has support for deferred skb freeing because it uses qdisc_reset_queue() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_codel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index dddf3bb65a32..c5bc424e3b3c 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -174,7 +174,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); - qdisc_drop(skb, sch); + rtnl_qdisc_drop(skb, sch); } qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); From e14ffdfdd67a9eba36bd76af0df1f2ac363ae906 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 20:21:53 -0700 Subject: [PATCH 04/10] net_sched: sch_fq: defer skb freeing Both fq_change() and fq_reset() can use rtnl_kfree_skbs() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index f49c81e91acd..6eb06674f778 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -514,17 +514,25 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) return skb; } +static void fq_flow_purge(struct fq_flow *flow) +{ + rtnl_kfree_skbs(flow->head, flow->tail); + flow->head = NULL; + flow->qlen = 0; +} + static void fq_reset(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); struct rb_root *root; - struct sk_buff *skb; struct rb_node *p; struct fq_flow *f; unsigned int idx; - while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL) - kfree_skb(skb); + sch->q.qlen = 0; + sch->qstats.backlog = 0; + + fq_flow_purge(&q->internal); if (!q->fq_root) return; @@ -535,8 +543,7 @@ static void fq_reset(struct Qdisc *sch) f = container_of(p, struct fq_flow, fq_node); rb_erase(p, root); - while ((skb = fq_dequeue_head(sch, f)) != NULL) - kfree_skb(skb); + fq_flow_purge(f); kmem_cache_free(fq_flow_cachep, f); } @@ -737,7 +744,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (!skb) break; drop_len += qdisc_pkt_len(skb); - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); drop_count++; } qdisc_tree_reduce_backlog(sch, drop_count, drop_len); From ece5d4c723b69a4aa0cd1f545ebbdc3a37c80dc6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 20:21:54 -0700 Subject: [PATCH 05/10] net_sched: fq_codel: defer skb freeing Both fq_codel_change() and fq_codel_reset() can use rtnl_kfree_skbs() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index a302e8ef5498..2dc0a849515a 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -336,6 +336,12 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) return skb; } +static void fq_codel_flow_purge(struct fq_codel_flow *flow) +{ + rtnl_kfree_skbs(flow->head, flow->tail); + flow->head = NULL; +} + static void fq_codel_reset(struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); @@ -346,18 +352,13 @@ static void fq_codel_reset(struct Qdisc *sch) for (i = 0; i < q->flows_cnt; i++) { struct fq_codel_flow *flow = q->flows + i; - while (flow->head) { - struct sk_buff *skb = dequeue_head(flow); - - qdisc_qstats_backlog_dec(sch, skb); - kfree_skb(skb); - } - + fq_codel_flow_purge(flow); INIT_LIST_HEAD(&flow->flowchain); codel_vars_init(&flow->cvars); } memset(q->backlogs, 0, q->flows_cnt * sizeof(u32)); sch->q.qlen = 0; + sch->qstats.backlog = 0; q->memory_usage = 0; } @@ -433,7 +434,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) struct sk_buff *skb = fq_codel_dequeue(sch); q->cstats.drop_len += qdisc_pkt_len(skb); - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); q->cstats.drop_count++; } qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len); From e7e424cdc4b2fcd7507b71d3a931708d11d5a61e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 20:21:55 -0700 Subject: [PATCH 06/10] net_sched: sch_hhf: defer skb freeing Both hhf_reset() and hhf_change() can use rtnl_kfree_skbs() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_hhf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index c51791848a38..c44593b8e65a 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -464,7 +464,7 @@ static void hhf_reset(struct Qdisc *sch) struct sk_buff *skb; while ((skb = hhf_dequeue(sch)) != NULL) - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); } static void *hhf_zalloc(size_t sz) @@ -574,7 +574,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = hhf_dequeue(sch); - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); } qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, prev_backlog - sch->qstats.backlog); From a5a9f5346fb95c00d47e8b4648f6d5eada147cd4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 20:21:56 -0700 Subject: [PATCH 07/10] net_sched: sch_htb: defer skb freeing Both htb_reset() and htb_destroy() can use __qdisc_reset_queue() instead of __skb_queue_purge() to defer skb freeing of internal queues. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 07dcd2933f01..a454605ab5cb 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -957,7 +957,7 @@ static void htb_reset(struct Qdisc *sch) } } qdisc_watchdog_cancel(&q->watchdog); - __skb_queue_purge(&q->direct_queue); + __qdisc_reset_queue(&q->direct_queue); sch->q.qlen = 0; sch->qstats.backlog = 0; memset(q->hlevel, 0, sizeof(q->hlevel)); @@ -1231,7 +1231,7 @@ static void htb_destroy(struct Qdisc *sch) htb_destroy_class(sch, cl); } qdisc_class_hash_destroy(&q->clhash); - __skb_queue_purge(&q->direct_queue); + __qdisc_reset_queue(&q->direct_queue); } static int htb_delete(struct Qdisc *sch, unsigned long arg) From 2f08a9a16288b60df3ddfe97c965427ce0163297 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 20:21:57 -0700 Subject: [PATCH 08/10] net_sched: sch_netem: defer skb freeing rtnl_kfree_skbs() can be used in tfifo_reset() It would be nice if we could iterate through rb tree instead of removing one skb at a time, and build a single skb chain. But this is left for a future patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 876df13c745a..e271967439bf 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -368,9 +368,7 @@ static void tfifo_reset(struct Qdisc *sch) struct sk_buff *skb = netem_rb_to_skb(p); rb_erase(p, &q->t_root); - skb->next = NULL; - skb->prev = NULL; - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); } } From db4879d93c351cb978db1eb4c963f44d267d63a2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 20:21:58 -0700 Subject: [PATCH 09/10] net_sched: sch_pie: defer skb freeing pie_change() can use rtnl_qdisc_drop() to benefit from deferred freeing. pie_reset() is already using qdisc_reset_queue() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_pie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 71ae3b9629f9..912a46a5d02e 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -234,7 +234,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); - qdisc_drop(skb, sch); + rtnl_qdisc_drop(skb, sch); } qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); From fea024784f588a1c50e7718d6053697ebdcc033e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 20:21:59 -0700 Subject: [PATCH 10/10] net_sched: sch_fq: defer skb freeing sfq_reset() can use rtnl_kfree_skbs() instead of kfree_skb() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index a2e0b855d1c8..57d118b41cad 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -520,7 +520,7 @@ sfq_reset(struct Qdisc *sch) struct sk_buff *skb; while ((skb = sfq_dequeue(sch)) != NULL) - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); } /*