mirror of https://gitee.com/openkylin/linux.git
Merge branch 'tcp_metrics_netns_debloat'
Eric W. Biederman says: ==================== tcp_metrics: Network namespace bloat reduction v3 This is a small pile of patches that convert tcp_metrics from using a hash table per network namespace to using a single hash table for all network namespaces. This is broken up into several patches so that each small step along the way could be carefully scrutinized as I wrote it, and equally so that each small step can be reviewed. There are several cleanups included in this series. The addition of panic calls during boot where we can not handle failure, and not trying simplifies the code. The removal of the return code from tcp_metrics_flush_all. The motivation for this change is that the tcp_metrics hash table at 128KiB is one of the largest components of a freshly allocated network namespace. I am resending the the previous version I sent has suffered bitrot, so I have respun the patches so that they apply. I believe I have addressed all of the review concerns except optimal behavior on little machines with 32-byte cache lines, which is beyond me as even the current code has bad behavior in that case. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
5e1459ca13
|
@ -54,8 +54,6 @@ struct netns_ipv4 {
|
|||
struct sock *mc_autojoin_sk;
|
||||
|
||||
struct inet_peer_base *peers;
|
||||
struct tcpm_hash_bucket *tcp_metrics_hash;
|
||||
unsigned int tcp_metrics_hash_log;
|
||||
struct sock * __percpu *tcp_sk;
|
||||
struct netns_frags frags;
|
||||
#ifdef CONFIG_NETFILTER
|
||||
|
|
|
@ -40,6 +40,7 @@ struct tcp_fastopen_metrics {
|
|||
|
||||
struct tcp_metrics_block {
|
||||
struct tcp_metrics_block __rcu *tcpm_next;
|
||||
possible_net_t tcpm_net;
|
||||
struct inetpeer_addr tcpm_saddr;
|
||||
struct inetpeer_addr tcpm_daddr;
|
||||
unsigned long tcpm_stamp;
|
||||
|
@ -52,6 +53,11 @@ struct tcp_metrics_block {
|
|||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
static inline struct net *tm_net(struct tcp_metrics_block *tm)
|
||||
{
|
||||
return read_pnet(&tm->tcpm_net);
|
||||
}
|
||||
|
||||
static bool tcp_metric_locked(struct tcp_metrics_block *tm,
|
||||
enum tcp_metric_index idx)
|
||||
{
|
||||
|
@ -91,6 +97,9 @@ struct tcpm_hash_bucket {
|
|||
struct tcp_metrics_block __rcu *chain;
|
||||
};
|
||||
|
||||
static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
|
||||
static unsigned int tcp_metrics_hash_log __read_mostly;
|
||||
|
||||
static DEFINE_SPINLOCK(tcp_metrics_lock);
|
||||
|
||||
static void tcpm_suck_dst(struct tcp_metrics_block *tm,
|
||||
|
@ -171,7 +180,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
|
|||
if (unlikely(reclaim)) {
|
||||
struct tcp_metrics_block *oldest;
|
||||
|
||||
oldest = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain);
|
||||
oldest = rcu_dereference(tcp_metrics_hash[hash].chain);
|
||||
for (tm = rcu_dereference(oldest->tcpm_next); tm;
|
||||
tm = rcu_dereference(tm->tcpm_next)) {
|
||||
if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
|
||||
|
@ -183,14 +192,15 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
|
|||
if (!tm)
|
||||
goto out_unlock;
|
||||
}
|
||||
write_pnet(&tm->tcpm_net, net);
|
||||
tm->tcpm_saddr = *saddr;
|
||||
tm->tcpm_daddr = *daddr;
|
||||
|
||||
tcpm_suck_dst(tm, dst, true);
|
||||
|
||||
if (likely(!reclaim)) {
|
||||
tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain;
|
||||
rcu_assign_pointer(net->ipv4.tcp_metrics_hash[hash].chain, tm);
|
||||
tm->tcpm_next = tcp_metrics_hash[hash].chain;
|
||||
rcu_assign_pointer(tcp_metrics_hash[hash].chain, tm);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
|
@ -214,10 +224,11 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s
|
|||
struct tcp_metrics_block *tm;
|
||||
int depth = 0;
|
||||
|
||||
for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
|
||||
for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
|
||||
tm = rcu_dereference(tm->tcpm_next)) {
|
||||
if (addr_same(&tm->tcpm_saddr, saddr) &&
|
||||
addr_same(&tm->tcpm_daddr, daddr))
|
||||
addr_same(&tm->tcpm_daddr, daddr) &&
|
||||
net_eq(tm_net(tm), net))
|
||||
break;
|
||||
depth++;
|
||||
}
|
||||
|
@ -252,12 +263,14 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
|
|||
}
|
||||
|
||||
net = dev_net(dst->dev);
|
||||
hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
|
||||
hash ^= net_hash_mix(net);
|
||||
hash = hash_32(hash, tcp_metrics_hash_log);
|
||||
|
||||
for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
|
||||
for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
|
||||
tm = rcu_dereference(tm->tcpm_next)) {
|
||||
if (addr_same(&tm->tcpm_saddr, &saddr) &&
|
||||
addr_same(&tm->tcpm_daddr, &daddr))
|
||||
addr_same(&tm->tcpm_daddr, &daddr) &&
|
||||
net_eq(tm_net(tm), net))
|
||||
break;
|
||||
}
|
||||
tcpm_check_stamp(tm, dst);
|
||||
|
@ -299,12 +312,14 @@ static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock
|
|||
return NULL;
|
||||
|
||||
net = twsk_net(tw);
|
||||
hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
|
||||
hash ^= net_hash_mix(net);
|
||||
hash = hash_32(hash, tcp_metrics_hash_log);
|
||||
|
||||
for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
|
||||
for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
|
||||
tm = rcu_dereference(tm->tcpm_next)) {
|
||||
if (addr_same(&tm->tcpm_saddr, &saddr) &&
|
||||
addr_same(&tm->tcpm_daddr, &daddr))
|
||||
addr_same(&tm->tcpm_daddr, &daddr) &&
|
||||
net_eq(tm_net(tm), net))
|
||||
break;
|
||||
}
|
||||
return tm;
|
||||
|
@ -347,7 +362,8 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
|
|||
return NULL;
|
||||
|
||||
net = dev_net(dst->dev);
|
||||
hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
|
||||
hash ^= net_hash_mix(net);
|
||||
hash = hash_32(hash, tcp_metrics_hash_log);
|
||||
|
||||
tm = __tcp_get_metrics(&saddr, &daddr, net, hash);
|
||||
if (tm == TCP_METRICS_RECLAIM_PTR)
|
||||
|
@ -898,17 +914,19 @@ static int tcp_metrics_nl_dump(struct sk_buff *skb,
|
|||
struct netlink_callback *cb)
|
||||
{
|
||||
struct net *net = sock_net(skb->sk);
|
||||
unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log;
|
||||
unsigned int max_rows = 1U << tcp_metrics_hash_log;
|
||||
unsigned int row, s_row = cb->args[0];
|
||||
int s_col = cb->args[1], col = s_col;
|
||||
|
||||
for (row = s_row; row < max_rows; row++, s_col = 0) {
|
||||
struct tcp_metrics_block *tm;
|
||||
struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row;
|
||||
struct tcpm_hash_bucket *hb = tcp_metrics_hash + row;
|
||||
|
||||
rcu_read_lock();
|
||||
for (col = 0, tm = rcu_dereference(hb->chain); tm;
|
||||
tm = rcu_dereference(tm->tcpm_next), col++) {
|
||||
if (!net_eq(tm_net(tm), net))
|
||||
continue;
|
||||
if (col < s_col)
|
||||
continue;
|
||||
if (tcp_metrics_dump_info(skb, cb, tm) < 0) {
|
||||
|
@ -994,13 +1012,15 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info)
|
|||
if (!reply)
|
||||
goto nla_put_failure;
|
||||
|
||||
hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
|
||||
hash ^= net_hash_mix(net);
|
||||
hash = hash_32(hash, tcp_metrics_hash_log);
|
||||
ret = -ESRCH;
|
||||
rcu_read_lock();
|
||||
for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
|
||||
for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm;
|
||||
tm = rcu_dereference(tm->tcpm_next)) {
|
||||
if (addr_same(&tm->tcpm_daddr, &daddr) &&
|
||||
(!src || addr_same(&tm->tcpm_saddr, &saddr))) {
|
||||
(!src || addr_same(&tm->tcpm_saddr, &saddr)) &&
|
||||
net_eq(tm_net(tm), net)) {
|
||||
ret = tcp_metrics_fill_info(msg, tm);
|
||||
break;
|
||||
}
|
||||
|
@ -1026,28 +1046,28 @@ static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info)
|
|||
|
||||
#define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held())
|
||||
|
||||
static int tcp_metrics_flush_all(struct net *net)
|
||||
static void tcp_metrics_flush_all(struct net *net)
|
||||
{
|
||||
unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log;
|
||||
struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash;
|
||||
unsigned int max_rows = 1U << tcp_metrics_hash_log;
|
||||
struct tcpm_hash_bucket *hb = tcp_metrics_hash;
|
||||
struct tcp_metrics_block *tm;
|
||||
unsigned int row;
|
||||
|
||||
for (row = 0; row < max_rows; row++, hb++) {
|
||||
struct tcp_metrics_block __rcu **pp;
|
||||
spin_lock_bh(&tcp_metrics_lock);
|
||||
tm = deref_locked_genl(hb->chain);
|
||||
if (tm)
|
||||
hb->chain = NULL;
|
||||
spin_unlock_bh(&tcp_metrics_lock);
|
||||
while (tm) {
|
||||
struct tcp_metrics_block *next;
|
||||
|
||||
next = deref_genl(tm->tcpm_next);
|
||||
kfree_rcu(tm, rcu_head);
|
||||
tm = next;
|
||||
pp = &hb->chain;
|
||||
for (tm = deref_locked_genl(*pp); tm;
|
||||
tm = deref_locked_genl(*pp)) {
|
||||
if (net_eq(tm_net(tm), net)) {
|
||||
*pp = tm->tcpm_next;
|
||||
kfree_rcu(tm, rcu_head);
|
||||
} else {
|
||||
pp = &tm->tcpm_next;
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&tcp_metrics_lock);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
|
||||
|
@ -1064,19 +1084,23 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
|
|||
ret = parse_nl_addr(info, &daddr, &hash, 1);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret > 0)
|
||||
return tcp_metrics_flush_all(net);
|
||||
if (ret > 0) {
|
||||
tcp_metrics_flush_all(net);
|
||||
return 0;
|
||||
}
|
||||
ret = parse_nl_saddr(info, &saddr);
|
||||
if (ret < 0)
|
||||
src = false;
|
||||
|
||||
hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
|
||||
hb = net->ipv4.tcp_metrics_hash + hash;
|
||||
hash ^= net_hash_mix(net);
|
||||
hash = hash_32(hash, tcp_metrics_hash_log);
|
||||
hb = tcp_metrics_hash + hash;
|
||||
pp = &hb->chain;
|
||||
spin_lock_bh(&tcp_metrics_lock);
|
||||
for (tm = deref_locked_genl(*pp); tm; tm = deref_locked_genl(*pp)) {
|
||||
if (addr_same(&tm->tcpm_daddr, &daddr) &&
|
||||
(!src || addr_same(&tm->tcpm_saddr, &saddr))) {
|
||||
(!src || addr_same(&tm->tcpm_saddr, &saddr)) &&
|
||||
net_eq(tm_net(tm), net)) {
|
||||
*pp = tm->tcpm_next;
|
||||
kfree_rcu(tm, rcu_head);
|
||||
found = true;
|
||||
|
@ -1126,6 +1150,9 @@ static int __net_init tcp_net_metrics_init(struct net *net)
|
|||
size_t size;
|
||||
unsigned int slots;
|
||||
|
||||
if (!net_eq(net, &init_net))
|
||||
return 0;
|
||||
|
||||
slots = tcpmhash_entries;
|
||||
if (!slots) {
|
||||
if (totalram_pages >= 128 * 1024)
|
||||
|
@ -1134,14 +1161,14 @@ static int __net_init tcp_net_metrics_init(struct net *net)
|
|||
slots = 8 * 1024;
|
||||
}
|
||||
|
||||
net->ipv4.tcp_metrics_hash_log = order_base_2(slots);
|
||||
size = sizeof(struct tcpm_hash_bucket) << net->ipv4.tcp_metrics_hash_log;
|
||||
tcp_metrics_hash_log = order_base_2(slots);
|
||||
size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log;
|
||||
|
||||
net->ipv4.tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!net->ipv4.tcp_metrics_hash)
|
||||
net->ipv4.tcp_metrics_hash = vzalloc(size);
|
||||
tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!tcp_metrics_hash)
|
||||
tcp_metrics_hash = vzalloc(size);
|
||||
|
||||
if (!net->ipv4.tcp_metrics_hash)
|
||||
if (!tcp_metrics_hash)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
|
@ -1149,19 +1176,7 @@ static int __net_init tcp_net_metrics_init(struct net *net)
|
|||
|
||||
static void __net_exit tcp_net_metrics_exit(struct net *net)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) {
|
||||
struct tcp_metrics_block *tm, *next;
|
||||
|
||||
tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1);
|
||||
while (tm) {
|
||||
next = rcu_dereference_protected(tm->tcpm_next, 1);
|
||||
kfree(tm);
|
||||
tm = next;
|
||||
}
|
||||
}
|
||||
kvfree(net->ipv4.tcp_metrics_hash);
|
||||
tcp_metrics_flush_all(net);
|
||||
}
|
||||
|
||||
static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
|
||||
|
@ -1175,16 +1190,10 @@ void __init tcp_metrics_init(void)
|
|||
|
||||
ret = register_pernet_subsys(&tcp_net_metrics_ops);
|
||||
if (ret < 0)
|
||||
goto cleanup;
|
||||
panic("Could not allocate the tcp_metrics hash table\n");
|
||||
|
||||
ret = genl_register_family_with_ops(&tcp_metrics_nl_family,
|
||||
tcp_metrics_nl_ops);
|
||||
if (ret < 0)
|
||||
goto cleanup_subsys;
|
||||
return;
|
||||
|
||||
cleanup_subsys:
|
||||
unregister_pernet_subsys(&tcp_net_metrics_ops);
|
||||
|
||||
cleanup:
|
||||
return;
|
||||
panic("Could not register tcp_metrics generic netlink\n");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue