2007-02-09 22:25:16 +08:00
|
|
|
/* net/sched/sch_ingress.c - Ingress qdisc
|
2005-04-17 06:20:36 +08:00
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* Authors: Jamal Hadi Salim 1999
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/types.h>
|
2007-07-03 13:49:07 +08:00
|
|
|
#include <linux/list.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/skbuff.h>
|
|
|
|
#include <linux/rtnetlink.h>
|
net: sched: further simplify handle_ing
Ingress qdisc has no other purpose than calling into tc_classify()
that executes attached classifier(s) and action(s).
It has a 1:1 relationship to dev->ingress_queue. After having commit
087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed
the central ingress lock, one major contention point is gone.
The extra indirection layers however, are not necessary for calling
into ingress qdisc. pktgen calling locally into netif_receive_skb()
with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon
E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps.
We can redirect the private classifier list to the netdev directly,
without changing any classifier API bits (!) and execute on that from
handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed,
ingress qdisc doesn't have a queue and thus dev_deactivate_queue()
is also not applicable, ingress_cl_list provides similar behaviour.
In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc.
One next possible step is the removal of the dev's ingress (dummy)
netdev_queue, and to only have the list member in the netdevice
itself.
Note, the filter chain is RCU protected and individual filter elements
are being kfree'd by sched subsystem after RCU grace period. RCU read
lock is being held by __netif_receive_skb_core().
Joint work with Alexei Starovoitov.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-10 04:51:32 +08:00
|
|
|
|
2007-03-26 14:06:12 +08:00
|
|
|
#include <net/netlink.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <net/pkt_sched.h>
|
|
|
|
|
|
|
|
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
|
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2008-01-21 16:11:01 +08:00
|
|
|
static unsigned long ingress_get(struct Qdisc *sch, u32 classid)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
return TC_H_MIN(classid) + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long ingress_bind_filter(struct Qdisc *sch,
|
2008-01-21 16:11:01 +08:00
|
|
|
unsigned long parent, u32 classid)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
return ingress_get(sch, classid);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ingress_put(struct Qdisc *sch, unsigned long cl)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2008-01-21 16:11:01 +08:00
|
|
|
static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2014-09-13 11:05:27 +08:00
|
|
|
static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
|
|
|
|
unsigned long cl)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
net: sched: further simplify handle_ing
Ingress qdisc has no other purpose than calling into tc_classify()
that executes attached classifier(s) and action(s).
It has a 1:1 relationship to dev->ingress_queue. After having commit
087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed
the central ingress lock, one major contention point is gone.
The extra indirection layers however, are not necessary for calling
into ingress qdisc. pktgen calling locally into netif_receive_skb()
with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon
E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps.
We can redirect the private classifier list to the netdev directly,
without changing any classifier API bits (!) and execute on that from
handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed,
ingress qdisc doesn't have a queue and thus dev_deactivate_queue()
is also not applicable, ingress_cl_list provides similar behaviour.
In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc.
One next possible step is the removal of the dev's ingress (dummy)
netdev_queue, and to only have the list member in the netdevice
itself.
Note, the filter chain is RCU protected and individual filter elements
are being kfree'd by sched subsystem after RCU grace period. RCU read
lock is being held by __netif_receive_skb_core().
Joint work with Alexei Starovoitov.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-10 04:51:32 +08:00
|
|
|
struct net_device *dev = qdisc_dev(sch);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
net: sched: further simplify handle_ing
Ingress qdisc has no other purpose than calling into tc_classify()
that executes attached classifier(s) and action(s).
It has a 1:1 relationship to dev->ingress_queue. After having commit
087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed
the central ingress lock, one major contention point is gone.
The extra indirection layers however, are not necessary for calling
into ingress qdisc. pktgen calling locally into netif_receive_skb()
with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon
E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps.
We can redirect the private classifier list to the netdev directly,
without changing any classifier API bits (!) and execute on that from
handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed,
ingress qdisc doesn't have a queue and thus dev_deactivate_queue()
is also not applicable, ingress_cl_list provides similar behaviour.
In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc.
One next possible step is the removal of the dev's ingress (dummy)
netdev_queue, and to only have the list member in the netdevice
itself.
Note, the filter chain is RCU protected and individual filter elements
are being kfree'd by sched subsystem after RCU grace period. RCU read
lock is being held by __netif_receive_skb_core().
Joint work with Alexei Starovoitov.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-10 04:51:32 +08:00
|
|
|
return &dev->ingress_cl_list;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
net: use jump label patching for ingress qdisc in __netif_receive_skb_core
Even if we make use of classifier and actions from the egress
path, we're going into handle_ing() executing additional code
on a per-packet cost for ingress qdisc, just to realize that
nothing is attached on ingress.
Instead, this can just be blinded out as a no-op entirely with
the use of a static key. On input fast-path, we already make
use of static keys in various places, e.g. skb time stamping,
in RPS, etc. It makes sense to not waste time when we're assured
that no ingress qdisc is attached anywhere.
Enabling/disabling of that code path is being done via two
helpers, namely net_{inc,dec}_ingress_queue(), that are being
invoked under RTNL mutex when a ingress qdisc is being either
initialized or destructed.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-11 05:07:54 +08:00
|
|
|
static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
|
|
|
|
{
|
|
|
|
net_inc_ingress_queue();
|
2015-05-01 11:14:07 +08:00
|
|
|
sch->flags |= TCQ_F_CPUSTATS;
|
net: use jump label patching for ingress qdisc in __netif_receive_skb_core
Even if we make use of classifier and actions from the egress
path, we're going into handle_ing() executing additional code
on a per-packet cost for ingress qdisc, just to realize that
nothing is attached on ingress.
Instead, this can just be blinded out as a no-op entirely with
the use of a static key. On input fast-path, we already make
use of static keys in various places, e.g. skb time stamping,
in RPS, etc. It makes sense to not waste time when we're assured
that no ingress qdisc is attached anywhere.
Enabling/disabling of that code path is being done via two
helpers, namely net_{inc,dec}_ingress_queue(), that are being
invoked under RTNL mutex when a ingress qdisc is being either
initialized or destructed.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-11 05:07:54 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static void ingress_destroy(struct Qdisc *sch)
|
|
|
|
{
|
net: sched: further simplify handle_ing
Ingress qdisc has no other purpose than calling into tc_classify()
that executes attached classifier(s) and action(s).
It has a 1:1 relationship to dev->ingress_queue. After having commit
087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed
the central ingress lock, one major contention point is gone.
The extra indirection layers however, are not necessary for calling
into ingress qdisc. pktgen calling locally into netif_receive_skb()
with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon
E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps.
We can redirect the private classifier list to the netdev directly,
without changing any classifier API bits (!) and execute on that from
handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed,
ingress qdisc doesn't have a queue and thus dev_deactivate_queue()
is also not applicable, ingress_cl_list provides similar behaviour.
In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc.
One next possible step is the removal of the dev's ingress (dummy)
netdev_queue, and to only have the list member in the netdevice
itself.
Note, the filter chain is RCU protected and individual filter elements
are being kfree'd by sched subsystem after RCU grace period. RCU read
lock is being held by __netif_receive_skb_core().
Joint work with Alexei Starovoitov.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-10 04:51:32 +08:00
|
|
|
struct net_device *dev = qdisc_dev(sch);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
net: sched: further simplify handle_ing
Ingress qdisc has no other purpose than calling into tc_classify()
that executes attached classifier(s) and action(s).
It has a 1:1 relationship to dev->ingress_queue. After having commit
087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed
the central ingress lock, one major contention point is gone.
The extra indirection layers however, are not necessary for calling
into ingress qdisc. pktgen calling locally into netif_receive_skb()
with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon
E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps.
We can redirect the private classifier list to the netdev directly,
without changing any classifier API bits (!) and execute on that from
handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed,
ingress qdisc doesn't have a queue and thus dev_deactivate_queue()
is also not applicable, ingress_cl_list provides similar behaviour.
In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc.
One next possible step is the removal of the dev's ingress (dummy)
netdev_queue, and to only have the list member in the netdevice
itself.
Note, the filter chain is RCU protected and individual filter elements
are being kfree'd by sched subsystem after RCU grace period. RCU read
lock is being held by __netif_receive_skb_core().
Joint work with Alexei Starovoitov.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-10 04:51:32 +08:00
|
|
|
tcf_destroy_chain(&dev->ingress_cl_list);
|
net: use jump label patching for ingress qdisc in __netif_receive_skb_core
Even if we make use of classifier and actions from the egress
path, we're going into handle_ing() executing additional code
on a per-packet cost for ingress qdisc, just to realize that
nothing is attached on ingress.
Instead, this can just be blinded out as a no-op entirely with
the use of a static key. On input fast-path, we already make
use of static keys in various places, e.g. skb time stamping,
in RPS, etc. It makes sense to not waste time when we're assured
that no ingress qdisc is attached anywhere.
Enabling/disabling of that code path is being done via two
helpers, namely net_{inc,dec}_ingress_queue(), that are being
invoked under RTNL mutex when a ingress qdisc is being either
initialized or destructed.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-11 05:07:54 +08:00
|
|
|
net_dec_ingress_queue();
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
|
|
|
|
{
|
2008-01-24 12:34:11 +08:00
|
|
|
struct nlattr *nest;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-01-24 12:34:11 +08:00
|
|
|
nest = nla_nest_start(skb, TCA_OPTIONS);
|
|
|
|
if (nest == NULL)
|
|
|
|
goto nla_put_failure;
|
net: sched: further simplify handle_ing
Ingress qdisc has no other purpose than calling into tc_classify()
that executes attached classifier(s) and action(s).
It has a 1:1 relationship to dev->ingress_queue. After having commit
087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed
the central ingress lock, one major contention point is gone.
The extra indirection layers however, are not necessary for calling
into ingress qdisc. pktgen calling locally into netif_receive_skb()
with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon
E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps.
We can redirect the private classifier list to the netdev directly,
without changing any classifier API bits (!) and execute on that from
handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed,
ingress qdisc doesn't have a queue and thus dev_deactivate_queue()
is also not applicable, ingress_cl_list provides similar behaviour.
In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc.
One next possible step is the removal of the dev's ingress (dummy)
netdev_queue, and to only have the list member in the netdevice
itself.
Note, the filter chain is RCU protected and individual filter elements
are being kfree'd by sched subsystem after RCU grace period. RCU read
lock is being held by __netif_receive_skb_core().
Joint work with Alexei Starovoitov.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-10 04:51:32 +08:00
|
|
|
|
2014-03-12 10:20:32 +08:00
|
|
|
return nla_nest_end(skb, nest);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2008-01-23 14:11:17 +08:00
|
|
|
nla_put_failure:
|
2008-01-24 12:34:11 +08:00
|
|
|
nla_nest_cancel(skb, nest);
|
2005-04-17 06:20:36 +08:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2007-11-14 17:44:41 +08:00
|
|
|
static const struct Qdisc_class_ops ingress_class_ops = {
|
2005-04-17 06:20:36 +08:00
|
|
|
.leaf = ingress_leaf,
|
|
|
|
.get = ingress_get,
|
|
|
|
.put = ingress_put,
|
|
|
|
.walk = ingress_walk,
|
|
|
|
.tcf_chain = ingress_find_tcf,
|
|
|
|
.bind_tcf = ingress_bind_filter,
|
|
|
|
.unbind_tcf = ingress_put,
|
|
|
|
};
|
|
|
|
|
2007-11-14 17:44:41 +08:00
|
|
|
static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
|
2005-04-17 06:20:36 +08:00
|
|
|
.cl_ops = &ingress_class_ops,
|
|
|
|
.id = "ingress",
|
net: use jump label patching for ingress qdisc in __netif_receive_skb_core
Even if we make use of classifier and actions from the egress
path, we're going into handle_ing() executing additional code
on a per-packet cost for ingress qdisc, just to realize that
nothing is attached on ingress.
Instead, this can just be blinded out as a no-op entirely with
the use of a static key. On input fast-path, we already make
use of static keys in various places, e.g. skb time stamping,
in RPS, etc. It makes sense to not waste time when we're assured
that no ingress qdisc is attached anywhere.
Enabling/disabling of that code path is being done via two
helpers, namely net_{inc,dec}_ingress_queue(), that are being
invoked under RTNL mutex when a ingress qdisc is being either
initialized or destructed.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-04-11 05:07:54 +08:00
|
|
|
.init = ingress_init,
|
2005-04-17 06:20:36 +08:00
|
|
|
.destroy = ingress_destroy,
|
|
|
|
.dump = ingress_dump,
|
|
|
|
.owner = THIS_MODULE,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init ingress_module_init(void)
|
|
|
|
{
|
2008-01-21 16:14:05 +08:00
|
|
|
return register_qdisc(&ingress_qdisc_ops);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2008-01-21 16:11:01 +08:00
|
|
|
|
2007-02-09 22:25:16 +08:00
|
|
|
static void __exit ingress_module_exit(void)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
unregister_qdisc(&ingress_qdisc_ops);
|
|
|
|
}
|
2008-01-21 16:11:01 +08:00
|
|
|
|
net: sched: further simplify handle_ing
Ingress qdisc has no other purpose than calling into tc_classify()
that executes attached classifier(s) and action(s).
It has a 1:1 relationship to dev->ingress_queue. After having commit
087c1a601ad7 ("net: sched: run ingress qdisc without locks") removed
the central ingress lock, one major contention point is gone.
The extra indirection layers however, are not necessary for calling
into ingress qdisc. pktgen calling locally into netif_receive_skb()
with a dummy u32, single CPU result on a Supermicro X10SLM-F, Xeon
E3-1240: before ~21,1 Mpps, after patch ~22,9 Mpps.
We can redirect the private classifier list to the netdev directly,
without changing any classifier API bits (!) and execute on that from
handle_ing() side. The __QDISC_STATE_DEACTIVATE test can be removed,
ingress qdisc doesn't have a queue and thus dev_deactivate_queue()
is also not applicable, ingress_cl_list provides similar behaviour.
In other words, ingress qdisc acts like TCQ_F_BUILTIN qdisc.
One next possible step is the removal of the dev's ingress (dummy)
netdev_queue, and to only have the list member in the netdevice
itself.
Note, the filter chain is RCU protected and individual filter elements
are being kfree'd by sched subsystem after RCU grace period. RCU read
lock is being held by __netif_receive_skb_core().
Joint work with Alexei Starovoitov.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-10 04:51:32 +08:00
|
|
|
module_init(ingress_module_init);
|
|
|
|
module_exit(ingress_module_exit);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
MODULE_LICENSE("GPL");
|