Merge branch 'pktgen'
Jesper Dangaard Brouer says:
====================
Optimizing pktgen for single CPU performance
This series focus on optimizing "pktgen" for single CPU performance.
V2-series:
- Removed some patches
- Doc real reason for TX ring buffer filling up
NIC tuning for pktgen:
http://netoptimizer.blogspot.dk/2014/06/pktgen-for-network-overload-testing.html
General overload setup according to:
http://netoptimizer.blogspot.dk/2014/04/basic-tuning-for-network-overload.html
Hardware:
System: CPU E5-2630
NIC: Intel ixgbe/82599 chip
Testing done with net-next git tree on top of
commit 6623b41944
("Merge branch 'master' of...jkirsher/net-next")
Pktgen script exercising race condition:
https://github.com/netoptimizer/network-testing/blob/master/pktgen/unit_test01_race_add_rem_device_loop.sh
Tool for measuring LOCK overhead:
https://github.com/netoptimizer/network-testing/blob/master/src/overhead_cmpxchg.c
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
edd79ca8b3
|
@ -24,6 +24,34 @@ For monitoring and control pktgen creates:
|
|||
/proc/net/pktgen/ethX
|
||||
|
||||
|
||||
Tuning NIC for max performance
|
||||
==============================
|
||||
|
||||
The default NIC setting are (likely) not tuned for pktgen's artificial
|
||||
overload type of benchmarking, as this could hurt the normal use-case.
|
||||
|
||||
Specifically increasing the TX ring buffer in the NIC:
|
||||
# ethtool -G ethX tx 1024
|
||||
|
||||
A larger TX ring can improve pktgen's performance, while it can hurt
|
||||
in the general case, 1) because the TX ring buffer might get larger
|
||||
than the CPUs L1/L2 cache, 2) because it allow more queueing in the
|
||||
NIC HW layer (which is bad for bufferbloat).
|
||||
|
||||
One should be careful to conclude, that packets/descriptors in the HW
|
||||
TX ring cause delay. Drivers usually delay cleaning up the
|
||||
ring-buffers (for various performance reasons), thus packets stalling
|
||||
the TX ring, might just be waiting for cleanup.
|
||||
|
||||
This cleanup issues is specifically the case, for the driver ixgbe
|
||||
(Intel 82599 chip). This driver (ixgbe) combine TX+RX ring cleanups,
|
||||
and the cleanup interval is affected by the ethtool --coalesce setting
|
||||
of parameter "rx-usecs".
|
||||
|
||||
For ixgbe use e.g "30" resulting in approx 33K interrupts/sec (1/30*10^6):
|
||||
# ethtool -C ethX rx-usecs 30
|
||||
|
||||
|
||||
Viewing threads
|
||||
===============
|
||||
/proc/net/pktgen/kpktgend_0
|
||||
|
|
|
@ -69,8 +69,9 @@
|
|||
* for running devices in the if_list and sends packets until count is 0 it
|
||||
* also the thread checks the thread->control which is used for inter-process
|
||||
* communication. controlling process "posts" operations to the threads this
|
||||
* way. The if_lock should be possible to remove when add/rem_device is merged
|
||||
* into this too.
|
||||
* way.
|
||||
* The if_list is RCU protected, and the if_lock remains to protect updating
|
||||
* of if_list, from "add_device" as it invoked from userspace (via proc write).
|
||||
*
|
||||
* By design there should only be *one* "controlling" process. In practice
|
||||
* multiple write accesses gives unpredictable result. Understood by "write"
|
||||
|
@ -208,7 +209,7 @@
|
|||
#define T_REMDEVALL (1<<2) /* Remove all devs */
|
||||
#define T_REMDEV (1<<3) /* Remove one dev */
|
||||
|
||||
/* If lock -- can be removed after some work */
|
||||
/* If lock -- protects updating of if_list */
|
||||
#define if_lock(t) spin_lock(&(t->if_lock));
|
||||
#define if_unlock(t) spin_unlock(&(t->if_lock));
|
||||
|
||||
|
@ -241,6 +242,7 @@ struct pktgen_dev {
|
|||
struct proc_dir_entry *entry; /* proc file */
|
||||
struct pktgen_thread *pg_thread;/* the owner */
|
||||
struct list_head list; /* chaining in the thread's run-queue */
|
||||
struct rcu_head rcu; /* freed by RCU */
|
||||
|
||||
int running; /* if false, the test will stop */
|
||||
|
||||
|
@ -1737,14 +1739,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
|
|||
|
||||
seq_puts(seq, "Running: ");
|
||||
|
||||
if_lock(t);
|
||||
list_for_each_entry(pkt_dev, &t->if_list, list)
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
|
||||
if (pkt_dev->running)
|
||||
seq_printf(seq, "%s ", pkt_dev->odevname);
|
||||
|
||||
seq_puts(seq, "\nStopped: ");
|
||||
|
||||
list_for_each_entry(pkt_dev, &t->if_list, list)
|
||||
list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
|
||||
if (!pkt_dev->running)
|
||||
seq_printf(seq, "%s ", pkt_dev->odevname);
|
||||
|
||||
|
@ -1753,7 +1755,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
|
|||
else
|
||||
seq_puts(seq, "\nResult: NA\n");
|
||||
|
||||
if_unlock(t);
|
||||
rcu_read_unlock();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1878,10 +1880,8 @@ static struct pktgen_dev *__pktgen_NN_threads(const struct pktgen_net *pn,
|
|||
pkt_dev = pktgen_find_dev(t, ifname, exact);
|
||||
if (pkt_dev) {
|
||||
if (remove) {
|
||||
if_lock(t);
|
||||
pkt_dev->removal_mark = 1;
|
||||
t->control |= T_REMDEV;
|
||||
if_unlock(t);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1931,7 +1931,8 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
|
|||
list_for_each_entry(t, &pn->pktgen_threads, th_list) {
|
||||
struct pktgen_dev *pkt_dev;
|
||||
|
||||
list_for_each_entry(pkt_dev, &t->if_list, list) {
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
|
||||
if (pkt_dev->odev != dev)
|
||||
continue;
|
||||
|
||||
|
@ -1946,6 +1947,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
|
|||
dev->name);
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2997,8 +2999,8 @@ static void pktgen_run(struct pktgen_thread *t)
|
|||
|
||||
func_enter();
|
||||
|
||||
if_lock(t);
|
||||
list_for_each_entry(pkt_dev, &t->if_list, list) {
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
|
||||
|
||||
/*
|
||||
* setup odev and create initial packet.
|
||||
|
@ -3007,18 +3009,18 @@ static void pktgen_run(struct pktgen_thread *t)
|
|||
|
||||
if (pkt_dev->odev) {
|
||||
pktgen_clear_counters(pkt_dev);
|
||||
pkt_dev->running = 1; /* Cranke yeself! */
|
||||
pkt_dev->skb = NULL;
|
||||
pkt_dev->started_at = pkt_dev->next_tx = ktime_get();
|
||||
|
||||
set_pkt_overhead(pkt_dev);
|
||||
|
||||
strcpy(pkt_dev->result, "Starting");
|
||||
pkt_dev->running = 1; /* Cranke yeself! */
|
||||
started++;
|
||||
} else
|
||||
strcpy(pkt_dev->result, "Error starting");
|
||||
}
|
||||
if_unlock(t);
|
||||
rcu_read_unlock();
|
||||
if (started)
|
||||
t->control &= ~(T_STOP);
|
||||
}
|
||||
|
@ -3041,27 +3043,25 @@ static int thread_is_running(const struct pktgen_thread *t)
|
|||
{
|
||||
const struct pktgen_dev *pkt_dev;
|
||||
|
||||
list_for_each_entry(pkt_dev, &t->if_list, list)
|
||||
if (pkt_dev->running)
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(pkt_dev, &t->if_list, list)
|
||||
if (pkt_dev->running) {
|
||||
rcu_read_unlock();
|
||||
return 1;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pktgen_wait_thread_run(struct pktgen_thread *t)
|
||||
{
|
||||
if_lock(t);
|
||||
|
||||
while (thread_is_running(t)) {
|
||||
|
||||
if_unlock(t);
|
||||
|
||||
msleep_interruptible(100);
|
||||
|
||||
if (signal_pending(current))
|
||||
goto signal;
|
||||
if_lock(t);
|
||||
}
|
||||
if_unlock(t);
|
||||
return 1;
|
||||
signal:
|
||||
return 0;
|
||||
|
@ -3166,10 +3166,10 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
pkt_dev->running = 0;
|
||||
kfree_skb(pkt_dev->skb);
|
||||
pkt_dev->skb = NULL;
|
||||
pkt_dev->stopped_at = ktime_get();
|
||||
pkt_dev->running = 0;
|
||||
|
||||
show_results(pkt_dev, nr_frags);
|
||||
|
||||
|
@ -3180,9 +3180,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
|
|||
{
|
||||
struct pktgen_dev *pkt_dev, *best = NULL;
|
||||
|
||||
if_lock(t);
|
||||
|
||||
list_for_each_entry(pkt_dev, &t->if_list, list) {
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
|
||||
if (!pkt_dev->running)
|
||||
continue;
|
||||
if (best == NULL)
|
||||
|
@ -3190,7 +3189,8 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t)
|
|||
else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0)
|
||||
best = pkt_dev;
|
||||
}
|
||||
if_unlock(t);
|
||||
rcu_read_unlock();
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
|
@ -3200,13 +3200,13 @@ static void pktgen_stop(struct pktgen_thread *t)
|
|||
|
||||
func_enter();
|
||||
|
||||
if_lock(t);
|
||||
rcu_read_lock();
|
||||
|
||||
list_for_each_entry(pkt_dev, &t->if_list, list) {
|
||||
list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
|
||||
pktgen_stop_device(pkt_dev);
|
||||
}
|
||||
|
||||
if_unlock(t);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3220,8 +3220,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
|
|||
|
||||
func_enter();
|
||||
|
||||
if_lock(t);
|
||||
|
||||
list_for_each_safe(q, n, &t->if_list) {
|
||||
cur = list_entry(q, struct pktgen_dev, list);
|
||||
|
||||
|
@ -3235,8 +3233,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
|
|||
|
||||
break;
|
||||
}
|
||||
|
||||
if_unlock(t);
|
||||
}
|
||||
|
||||
static void pktgen_rem_all_ifs(struct pktgen_thread *t)
|
||||
|
@ -3248,8 +3244,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
|
|||
|
||||
/* Remove all devices, free mem */
|
||||
|
||||
if_lock(t);
|
||||
|
||||
list_for_each_safe(q, n, &t->if_list) {
|
||||
cur = list_entry(q, struct pktgen_dev, list);
|
||||
|
||||
|
@ -3258,8 +3252,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
|
|||
|
||||
pktgen_remove_device(t, cur);
|
||||
}
|
||||
|
||||
if_unlock(t);
|
||||
}
|
||||
|
||||
static void pktgen_rem_thread(struct pktgen_thread *t)
|
||||
|
@ -3407,10 +3399,10 @@ static int pktgen_thread_worker(void *arg)
|
|||
|
||||
pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
set_freezable();
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
pkt_dev = next_to_run(t);
|
||||
|
||||
|
@ -3424,8 +3416,6 @@ static int pktgen_thread_worker(void *arg)
|
|||
continue;
|
||||
}
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
if (likely(pkt_dev)) {
|
||||
pktgen_xmit(pkt_dev);
|
||||
|
||||
|
@ -3456,9 +3446,8 @@ static int pktgen_thread_worker(void *arg)
|
|||
}
|
||||
|
||||
try_to_freeze();
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
}
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
pr_debug("%s stopping all device\n", t->tsk->comm);
|
||||
pktgen_stop(t);
|
||||
|
@ -3485,8 +3474,8 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
|
|||
struct pktgen_dev *p, *pkt_dev = NULL;
|
||||
size_t len = strlen(ifname);
|
||||
|
||||
if_lock(t);
|
||||
list_for_each_entry(p, &t->if_list, list)
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(p, &t->if_list, list)
|
||||
if (strncmp(p->odevname, ifname, len) == 0) {
|
||||
if (p->odevname[len]) {
|
||||
if (exact || p->odevname[len] != '@')
|
||||
|
@ -3496,7 +3485,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
|
|||
break;
|
||||
}
|
||||
|
||||
if_unlock(t);
|
||||
rcu_read_unlock();
|
||||
pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
|
||||
return pkt_dev;
|
||||
}
|
||||
|
@ -3510,6 +3499,12 @@ static int add_dev_to_thread(struct pktgen_thread *t,
|
|||
{
|
||||
int rv = 0;
|
||||
|
||||
/* This function cannot be called concurrently, as its called
|
||||
* under pktgen_thread_lock mutex, but it can run from
|
||||
* userspace on another CPU than the kthread. The if_lock()
|
||||
* is used here to sync with concurrent instances of
|
||||
* _rem_dev_from_if_list() invoked via kthread, which is also
|
||||
* updating the if_list */
|
||||
if_lock(t);
|
||||
|
||||
if (pkt_dev->pg_thread) {
|
||||
|
@ -3518,9 +3513,9 @@ static int add_dev_to_thread(struct pktgen_thread *t,
|
|||
goto out;
|
||||
}
|
||||
|
||||
list_add(&pkt_dev->list, &t->if_list);
|
||||
pkt_dev->pg_thread = t;
|
||||
pkt_dev->running = 0;
|
||||
pkt_dev->pg_thread = t;
|
||||
list_add_rcu(&pkt_dev->list, &t->if_list);
|
||||
|
||||
out:
|
||||
if_unlock(t);
|
||||
|
@ -3675,11 +3670,13 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
|
|||
struct list_head *q, *n;
|
||||
struct pktgen_dev *p;
|
||||
|
||||
if_lock(t);
|
||||
list_for_each_safe(q, n, &t->if_list) {
|
||||
p = list_entry(q, struct pktgen_dev, list);
|
||||
if (p == pkt_dev)
|
||||
list_del(&p->list);
|
||||
list_del_rcu(&p->list);
|
||||
}
|
||||
if_unlock(t);
|
||||
}
|
||||
|
||||
static int pktgen_remove_device(struct pktgen_thread *t,
|
||||
|
@ -3699,20 +3696,22 @@ static int pktgen_remove_device(struct pktgen_thread *t,
|
|||
pkt_dev->odev = NULL;
|
||||
}
|
||||
|
||||
/* And update the thread if_list */
|
||||
|
||||
_rem_dev_from_if_list(t, pkt_dev);
|
||||
|
||||
/* Remove proc before if_list entry, because add_device uses
|
||||
* list to determine if interface already exist, avoid race
|
||||
* with proc_create_data() */
|
||||
if (pkt_dev->entry)
|
||||
proc_remove(pkt_dev->entry);
|
||||
|
||||
/* And update the thread if_list */
|
||||
_rem_dev_from_if_list(t, pkt_dev);
|
||||
|
||||
#ifdef CONFIG_XFRM
|
||||
free_SAs(pkt_dev);
|
||||
#endif
|
||||
vfree(pkt_dev->flows);
|
||||
if (pkt_dev->page)
|
||||
put_page(pkt_dev->page);
|
||||
kfree(pkt_dev);
|
||||
kfree_rcu(pkt_dev, rcu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3812,6 +3811,7 @@ static void __exit pg_cleanup(void)
|
|||
{
|
||||
unregister_netdevice_notifier(&pktgen_notifier_block);
|
||||
unregister_pernet_subsys(&pg_net_ops);
|
||||
/* Don't need rcu_barrier() due to use of kfree_rcu() */
|
||||
}
|
||||
|
||||
module_init(pg_init);
|
||||
|
|
Loading…
Reference in New Issue