mirror of https://gitee.com/openkylin/linux.git
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for net-next: 1) Add vlan match and pop actions to the flowtable offload, patches from wenxu. 2) Reduce size of the netns_ct structure, which itself is embedded in struct net Make netns_ct a read-mostly structure. Patches from Florian Westphal. 3) Add FLOW_OFFLOAD_XMIT_UNSPEC to skip dst check from garbage collector path, as required by the tc CT action. From Roi Dayan. 4) VLAN offload fixes for nftables: Allow for matching on both s-vlan and c-vlan selectors. Fix match of VLAN id due to incorrect byteorder. Add a new routine to properly populate flow dissector ethertypes. 5) Missing keys in ip{6}_route_me_harder() results in incorrect routes. This includes an update for selftest infra. Patches from Ido Schimmel. 6) Add counter hardware offload support through FLOW_CLS_STATS. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
6dd06ec7c1
|
@ -44,6 +44,13 @@ union nf_conntrack_expect_proto {
|
|||
};
|
||||
|
||||
struct nf_conntrack_net {
|
||||
/* only used when new connection is allocated: */
|
||||
atomic_t count;
|
||||
unsigned int expect_count;
|
||||
u8 sysctl_auto_assign_helper;
|
||||
bool auto_assign_helper_warned;
|
||||
|
||||
/* only used from work queues, configuration plane, and so on: */
|
||||
unsigned int users4;
|
||||
unsigned int users6;
|
||||
unsigned int users_bridge;
|
||||
|
@ -331,6 +338,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
|
|||
void nf_ct_tmpl_free(struct nf_conn *tmpl);
|
||||
|
||||
u32 nf_ct_get_id(const struct nf_conn *ct);
|
||||
u32 nf_conntrack_count(const struct net *net);
|
||||
|
||||
static inline void
|
||||
nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
|
||||
|
|
|
@ -21,6 +21,8 @@ struct nf_flow_key {
|
|||
struct flow_dissector_key_control control;
|
||||
struct flow_dissector_key_control enc_control;
|
||||
struct flow_dissector_key_basic basic;
|
||||
struct flow_dissector_key_vlan vlan;
|
||||
struct flow_dissector_key_vlan cvlan;
|
||||
union {
|
||||
struct flow_dissector_key_ipv4_addrs ipv4;
|
||||
struct flow_dissector_key_ipv6_addrs ipv6;
|
||||
|
@ -90,7 +92,8 @@ enum flow_offload_tuple_dir {
|
|||
#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
|
||||
|
||||
enum flow_offload_xmit_type {
|
||||
FLOW_OFFLOAD_XMIT_NEIGH = 0,
|
||||
FLOW_OFFLOAD_XMIT_UNSPEC = 0,
|
||||
FLOW_OFFLOAD_XMIT_NEIGH,
|
||||
FLOW_OFFLOAD_XMIT_XFRM,
|
||||
FLOW_OFFLOAD_XMIT_DIRECT,
|
||||
};
|
||||
|
|
|
@ -867,6 +867,8 @@ struct nft_expr_ops {
|
|||
int (*offload)(struct nft_offload_ctx *ctx,
|
||||
struct nft_flow_rule *flow,
|
||||
const struct nft_expr *expr);
|
||||
void (*offload_stats)(struct nft_expr *expr,
|
||||
const struct flow_stats *stats);
|
||||
u32 offload_flags;
|
||||
const struct nft_expr_type *type;
|
||||
void *data;
|
||||
|
|
|
@ -4,11 +4,16 @@
|
|||
#include <net/flow_offload.h>
|
||||
#include <net/netfilter/nf_tables.h>
|
||||
|
||||
enum nft_offload_reg_flags {
|
||||
NFT_OFFLOAD_F_NETWORK2HOST = (1 << 0),
|
||||
};
|
||||
|
||||
struct nft_offload_reg {
|
||||
u32 key;
|
||||
u32 len;
|
||||
u32 base_offset;
|
||||
u32 offset;
|
||||
u32 flags;
|
||||
struct nft_data data;
|
||||
struct nft_data mask;
|
||||
};
|
||||
|
@ -45,6 +50,7 @@ struct nft_flow_key {
|
|||
struct flow_dissector_key_ports tp;
|
||||
struct flow_dissector_key_ip ip;
|
||||
struct flow_dissector_key_vlan vlan;
|
||||
struct flow_dissector_key_vlan cvlan;
|
||||
struct flow_dissector_key_eth_addrs eth_addrs;
|
||||
struct flow_dissector_key_meta meta;
|
||||
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
|
||||
|
@ -68,16 +74,21 @@ void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
|
|||
|
||||
struct nft_rule;
|
||||
struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule);
|
||||
int nft_flow_rule_stats(const struct nft_chain *chain, const struct nft_rule *rule);
|
||||
void nft_flow_rule_destroy(struct nft_flow_rule *flow);
|
||||
int nft_flow_rule_offload_commit(struct net *net);
|
||||
|
||||
#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
|
||||
#define NFT_OFFLOAD_MATCH_FLAGS(__key, __base, __field, __len, __reg, __flags) \
|
||||
(__reg)->base_offset = \
|
||||
offsetof(struct nft_flow_key, __base); \
|
||||
(__reg)->offset = \
|
||||
offsetof(struct nft_flow_key, __base.__field); \
|
||||
(__reg)->len = __len; \
|
||||
(__reg)->key = __key; \
|
||||
(__reg)->flags = __flags;
|
||||
|
||||
#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
|
||||
NFT_OFFLOAD_MATCH_FLAGS(__key, __base, __field, __len, __reg, 0)
|
||||
|
||||
#define NFT_OFFLOAD_MATCH_EXACT(__key, __base, __field, __len, __reg) \
|
||||
NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
|
||||
|
|
|
@ -24,9 +24,9 @@ struct nf_generic_net {
|
|||
|
||||
struct nf_tcp_net {
|
||||
unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX];
|
||||
int tcp_loose;
|
||||
int tcp_be_liberal;
|
||||
int tcp_max_retrans;
|
||||
u8 tcp_loose;
|
||||
u8 tcp_be_liberal;
|
||||
u8 tcp_max_retrans;
|
||||
};
|
||||
|
||||
enum udp_conntrack {
|
||||
|
@ -45,7 +45,7 @@ struct nf_icmp_net {
|
|||
|
||||
#ifdef CONFIG_NF_CT_PROTO_DCCP
|
||||
struct nf_dccp_net {
|
||||
int dccp_loose;
|
||||
u8 dccp_loose;
|
||||
unsigned int dccp_timeout[CT_DCCP_MAX + 1];
|
||||
};
|
||||
#endif
|
||||
|
@ -93,18 +93,15 @@ struct ct_pcpu {
|
|||
};
|
||||
|
||||
struct netns_ct {
|
||||
atomic_t count;
|
||||
unsigned int expect_count;
|
||||
#ifdef CONFIG_NF_CONNTRACK_EVENTS
|
||||
bool ecache_dwork_pending;
|
||||
#endif
|
||||
bool auto_assign_helper_warned;
|
||||
unsigned int sysctl_log_invalid; /* Log invalid packets */
|
||||
int sysctl_events;
|
||||
int sysctl_acct;
|
||||
int sysctl_auto_assign_helper;
|
||||
int sysctl_tstamp;
|
||||
int sysctl_checksum;
|
||||
u8 sysctl_log_invalid; /* Log invalid packets */
|
||||
u8 sysctl_events;
|
||||
u8 sysctl_acct;
|
||||
u8 sysctl_auto_assign_helper;
|
||||
u8 sysctl_tstamp;
|
||||
u8 sysctl_checksum;
|
||||
|
||||
struct ct_pcpu __percpu *pcpu_lists;
|
||||
struct ip_conntrack_stat __percpu *stat;
|
||||
|
|
|
@ -25,6 +25,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
|
|||
__be32 saddr = iph->saddr;
|
||||
__u8 flags;
|
||||
struct net_device *dev = skb_dst(skb)->dev;
|
||||
struct flow_keys flkeys;
|
||||
unsigned int hh_len;
|
||||
|
||||
sk = sk_to_full_sk(sk);
|
||||
|
@ -48,6 +49,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
|
|||
fl4.flowi4_oif = l3mdev_master_ifindex(dev);
|
||||
fl4.flowi4_mark = skb->mark;
|
||||
fl4.flowi4_flags = flags;
|
||||
fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys);
|
||||
rt = ip_route_output_key(net, &fl4);
|
||||
if (IS_ERR(rt))
|
||||
return PTR_ERR(rt);
|
||||
|
|
|
@ -24,6 +24,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
|
|||
{
|
||||
const struct ipv6hdr *iph = ipv6_hdr(skb);
|
||||
struct sock *sk = sk_to_full_sk(sk_partial);
|
||||
struct flow_keys flkeys;
|
||||
unsigned int hh_len;
|
||||
struct dst_entry *dst;
|
||||
int strict = (ipv6_addr_type(&iph->daddr) &
|
||||
|
@ -38,6 +39,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
|
|||
};
|
||||
int err;
|
||||
|
||||
fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys);
|
||||
dst = ip6_route_output(net, sk, &fl6);
|
||||
err = dst->error;
|
||||
if (err) {
|
||||
|
|
|
@ -55,6 +55,8 @@
|
|||
|
||||
#include "nf_internals.h"
|
||||
|
||||
extern unsigned int nf_conntrack_net_id;
|
||||
|
||||
__cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
|
||||
EXPORT_SYMBOL_GPL(nf_conntrack_locks);
|
||||
|
||||
|
@ -85,6 +87,8 @@ static __read_mostly bool nf_conntrack_locks_all;
|
|||
|
||||
static struct conntrack_gc_work conntrack_gc_work;
|
||||
|
||||
extern unsigned int nf_conntrack_net_id;
|
||||
|
||||
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
|
||||
{
|
||||
/* 1) Acquire the lock */
|
||||
|
@ -1379,6 +1383,7 @@ static void gc_worker(struct work_struct *work)
|
|||
i = 0;
|
||||
|
||||
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
|
||||
struct nf_conntrack_net *cnet;
|
||||
struct net *net;
|
||||
|
||||
tmp = nf_ct_tuplehash_to_ctrack(h);
|
||||
|
@ -1399,7 +1404,8 @@ static void gc_worker(struct work_struct *work)
|
|||
continue;
|
||||
|
||||
net = nf_ct_net(tmp);
|
||||
if (atomic_read(&net->ct.count) < nf_conntrack_max95)
|
||||
cnet = net_generic(net, nf_conntrack_net_id);
|
||||
if (atomic_read(&cnet->count) < nf_conntrack_max95)
|
||||
continue;
|
||||
|
||||
/* need to take reference to avoid possible races */
|
||||
|
@ -1478,17 +1484,18 @@ __nf_conntrack_alloc(struct net *net,
|
|||
const struct nf_conntrack_tuple *repl,
|
||||
gfp_t gfp, u32 hash)
|
||||
{
|
||||
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
|
||||
unsigned int ct_count;
|
||||
struct nf_conn *ct;
|
||||
|
||||
/* We don't want any race condition at early drop stage */
|
||||
atomic_inc(&net->ct.count);
|
||||
ct_count = atomic_inc_return(&cnet->count);
|
||||
|
||||
if (nf_conntrack_max &&
|
||||
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
|
||||
if (nf_conntrack_max && unlikely(ct_count > nf_conntrack_max)) {
|
||||
if (!early_drop(net, hash)) {
|
||||
if (!conntrack_gc_work.early_drop)
|
||||
conntrack_gc_work.early_drop = true;
|
||||
atomic_dec(&net->ct.count);
|
||||
atomic_dec(&cnet->count);
|
||||
net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
@ -1523,7 +1530,7 @@ __nf_conntrack_alloc(struct net *net,
|
|||
atomic_set(&ct->ct_general.use, 0);
|
||||
return ct;
|
||||
out:
|
||||
atomic_dec(&net->ct.count);
|
||||
atomic_dec(&cnet->count);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
|
@ -1540,6 +1547,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
|
|||
void nf_conntrack_free(struct nf_conn *ct)
|
||||
{
|
||||
struct net *net = nf_ct_net(ct);
|
||||
struct nf_conntrack_net *cnet;
|
||||
|
||||
/* A freed object has refcnt == 0, that's
|
||||
* the golden rule for SLAB_TYPESAFE_BY_RCU
|
||||
|
@ -1548,8 +1556,10 @@ void nf_conntrack_free(struct nf_conn *ct)
|
|||
|
||||
nf_ct_ext_destroy(ct);
|
||||
kmem_cache_free(nf_conntrack_cachep, ct);
|
||||
cnet = net_generic(net, nf_conntrack_net_id);
|
||||
|
||||
smp_mb__before_atomic();
|
||||
atomic_dec(&net->ct.count);
|
||||
atomic_dec(&cnet->count);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_conntrack_free);
|
||||
|
||||
|
@ -1570,6 +1580,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
|
|||
const struct nf_conntrack_zone *zone;
|
||||
struct nf_conn_timeout *timeout_ext;
|
||||
struct nf_conntrack_zone tmp;
|
||||
struct nf_conntrack_net *cnet;
|
||||
|
||||
if (!nf_ct_invert_tuple(&repl_tuple, tuple)) {
|
||||
pr_debug("Can't invert tuple.\n");
|
||||
|
@ -1603,7 +1614,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
|
|||
GFP_ATOMIC);
|
||||
|
||||
local_bh_disable();
|
||||
if (net->ct.expect_count) {
|
||||
cnet = net_generic(net, nf_conntrack_net_id);
|
||||
if (cnet->expect_count) {
|
||||
spin_lock(&nf_conntrack_expect_lock);
|
||||
exp = nf_ct_find_expectation(net, zone, tuple);
|
||||
if (exp) {
|
||||
|
@ -2305,9 +2317,11 @@ __nf_ct_unconfirmed_destroy(struct net *net)
|
|||
|
||||
void nf_ct_unconfirmed_destroy(struct net *net)
|
||||
{
|
||||
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (atomic_read(&net->ct.count) > 0) {
|
||||
if (atomic_read(&cnet->count) > 0) {
|
||||
__nf_ct_unconfirmed_destroy(net);
|
||||
nf_queue_nf_hook_drop(net);
|
||||
synchronize_net();
|
||||
|
@ -2319,11 +2333,12 @@ void nf_ct_iterate_cleanup_net(struct net *net,
|
|||
int (*iter)(struct nf_conn *i, void *data),
|
||||
void *data, u32 portid, int report)
|
||||
{
|
||||
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
|
||||
struct iter_data d;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (atomic_read(&net->ct.count) == 0)
|
||||
if (atomic_read(&cnet->count) == 0)
|
||||
return;
|
||||
|
||||
d.iter = iter;
|
||||
|
@ -2352,7 +2367,9 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
|
|||
|
||||
down_read(&net_rwsem);
|
||||
for_each_net(net) {
|
||||
if (atomic_read(&net->ct.count) == 0)
|
||||
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
|
||||
|
||||
if (atomic_read(&cnet->count) == 0)
|
||||
continue;
|
||||
__nf_ct_unconfirmed_destroy(net);
|
||||
nf_queue_nf_hook_drop(net);
|
||||
|
@ -2432,8 +2449,10 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
|
|||
i_see_dead_people:
|
||||
busy = 0;
|
||||
list_for_each_entry(net, net_exit_list, exit_list) {
|
||||
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
|
||||
|
||||
nf_ct_iterate_cleanup(kill_all, net, 0, 0);
|
||||
if (atomic_read(&net->ct.count) != 0)
|
||||
if (atomic_read(&cnet->count) != 0)
|
||||
busy = 1;
|
||||
}
|
||||
if (busy) {
|
||||
|
@ -2714,12 +2733,13 @@ void nf_conntrack_init_end(void)
|
|||
|
||||
int nf_conntrack_init_net(struct net *net)
|
||||
{
|
||||
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
|
||||
int ret = -ENOMEM;
|
||||
int cpu;
|
||||
|
||||
BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
|
||||
BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS);
|
||||
atomic_set(&net->ct.count, 0);
|
||||
atomic_set(&cnet->count, 0);
|
||||
|
||||
net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
|
||||
if (!net->ct.pcpu_lists)
|
||||
|
|
|
@ -43,18 +43,23 @@ unsigned int nf_ct_expect_max __read_mostly;
|
|||
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
|
||||
static unsigned int nf_ct_expect_hashrnd __read_mostly;
|
||||
|
||||
extern unsigned int nf_conntrack_net_id;
|
||||
|
||||
/* nf_conntrack_expect helper functions */
|
||||
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
|
||||
u32 portid, int report)
|
||||
{
|
||||
struct nf_conn_help *master_help = nfct_help(exp->master);
|
||||
struct net *net = nf_ct_exp_net(exp);
|
||||
struct nf_conntrack_net *cnet;
|
||||
|
||||
WARN_ON(!master_help);
|
||||
WARN_ON(timer_pending(&exp->timeout));
|
||||
|
||||
hlist_del_rcu(&exp->hnode);
|
||||
net->ct.expect_count--;
|
||||
|
||||
cnet = net_generic(net, nf_conntrack_net_id);
|
||||
cnet->expect_count--;
|
||||
|
||||
hlist_del_rcu(&exp->lnode);
|
||||
master_help->expecting[exp->class]--;
|
||||
|
@ -118,10 +123,11 @@ __nf_ct_expect_find(struct net *net,
|
|||
const struct nf_conntrack_zone *zone,
|
||||
const struct nf_conntrack_tuple *tuple)
|
||||
{
|
||||
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
|
||||
struct nf_conntrack_expect *i;
|
||||
unsigned int h;
|
||||
|
||||
if (!net->ct.expect_count)
|
||||
if (!cnet->expect_count)
|
||||
return NULL;
|
||||
|
||||
h = nf_ct_expect_dst_hash(net, tuple);
|
||||
|
@ -158,10 +164,11 @@ nf_ct_find_expectation(struct net *net,
|
|||
const struct nf_conntrack_zone *zone,
|
||||
const struct nf_conntrack_tuple *tuple)
|
||||
{
|
||||
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
|
||||
struct nf_conntrack_expect *i, *exp = NULL;
|
||||
unsigned int h;
|
||||
|
||||
if (!net->ct.expect_count)
|
||||
if (!cnet->expect_count)
|
||||
return NULL;
|
||||
|
||||
h = nf_ct_expect_dst_hash(net, tuple);
|
||||
|
@ -368,6 +375,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put);
|
|||
|
||||
static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
|
||||
{
|
||||
struct nf_conntrack_net *cnet;
|
||||
struct nf_conn_help *master_help = nfct_help(exp->master);
|
||||
struct nf_conntrack_helper *helper;
|
||||
struct net *net = nf_ct_exp_net(exp);
|
||||
|
@ -389,7 +397,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
|
|||
master_help->expecting[exp->class]++;
|
||||
|
||||
hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
|
||||
net->ct.expect_count++;
|
||||
cnet = net_generic(net, nf_conntrack_net_id);
|
||||
cnet->expect_count++;
|
||||
|
||||
NF_CT_STAT_INC(net, expect_create);
|
||||
}
|
||||
|
@ -415,6 +424,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
|
|||
{
|
||||
const struct nf_conntrack_expect_policy *p;
|
||||
struct nf_conntrack_expect *i;
|
||||
struct nf_conntrack_net *cnet;
|
||||
struct nf_conn *master = expect->master;
|
||||
struct nf_conn_help *master_help = nfct_help(master);
|
||||
struct nf_conntrack_helper *helper;
|
||||
|
@ -458,7 +468,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
|
|||
}
|
||||
}
|
||||
|
||||
if (net->ct.expect_count >= nf_ct_expect_max) {
|
||||
cnet = net_generic(net, nf_conntrack_net_id);
|
||||
if (cnet->expect_count >= nf_ct_expect_max) {
|
||||
net_warn_ratelimited("nf_conntrack: expectation table full\n");
|
||||
ret = -EMFILE;
|
||||
}
|
||||
|
@ -686,7 +697,6 @@ module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
|
|||
|
||||
int nf_conntrack_expect_pernet_init(struct net *net)
|
||||
{
|
||||
net->ct.expect_count = 0;
|
||||
return exp_proc_init(net);
|
||||
}
|
||||
|
||||
|
|
|
@ -43,6 +43,8 @@ MODULE_PARM_DESC(nf_conntrack_helper,
|
|||
static DEFINE_MUTEX(nf_ct_nat_helpers_mutex);
|
||||
static struct list_head nf_ct_nat_helpers __read_mostly;
|
||||
|
||||
extern unsigned int nf_conntrack_net_id;
|
||||
|
||||
/* Stupid hash, but collision free for the default registrations of the
|
||||
* helpers currently in the kernel. */
|
||||
static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
|
||||
|
@ -212,8 +214,10 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
|
|||
static struct nf_conntrack_helper *
|
||||
nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
|
||||
{
|
||||
if (!net->ct.sysctl_auto_assign_helper) {
|
||||
if (net->ct.auto_assign_helper_warned)
|
||||
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
|
||||
|
||||
if (!cnet->sysctl_auto_assign_helper) {
|
||||
if (cnet->auto_assign_helper_warned)
|
||||
return NULL;
|
||||
if (!__nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple))
|
||||
return NULL;
|
||||
|
@ -221,7 +225,7 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
|
|||
"has been turned off for security reasons and CT-based "
|
||||
"firewall rule not found. Use the iptables CT target "
|
||||
"to attach helpers instead.\n");
|
||||
net->ct.auto_assign_helper_warned = 1;
|
||||
cnet->auto_assign_helper_warned = true;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -556,8 +560,9 @@ static const struct nf_ct_ext_type helper_extend = {
|
|||
|
||||
void nf_conntrack_helper_pernet_init(struct net *net)
|
||||
{
|
||||
net->ct.auto_assign_helper_warned = false;
|
||||
net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
|
||||
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
|
||||
|
||||
cnet->sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
|
||||
}
|
||||
|
||||
int nf_conntrack_helper_init(void)
|
||||
|
|
|
@ -2559,9 +2559,9 @@ static int
|
|||
ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
|
||||
struct net *net)
|
||||
{
|
||||
struct nlmsghdr *nlh;
|
||||
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
|
||||
unsigned int nr_conntracks = atomic_read(&net->ct.count);
|
||||
unsigned int nr_conntracks;
|
||||
struct nlmsghdr *nlh;
|
||||
|
||||
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS);
|
||||
nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
|
||||
|
@ -2569,6 +2569,7 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
|
|||
if (!nlh)
|
||||
goto nlmsg_failure;
|
||||
|
||||
nr_conntracks = nf_conntrack_count(net);
|
||||
if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
|
||||
goto nla_put_failure;
|
||||
|
||||
|
|
|
@ -31,20 +31,6 @@
|
|||
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
|
||||
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
|
||||
|
||||
/* "Be conservative in what you do,
|
||||
be liberal in what you accept from others."
|
||||
If it's non-zero, we mark only out of window RST segments as INVALID. */
|
||||
static int nf_ct_tcp_be_liberal __read_mostly = 0;
|
||||
|
||||
/* If it is set to zero, we disable picking up already established
|
||||
connections. */
|
||||
static int nf_ct_tcp_loose __read_mostly = 1;
|
||||
|
||||
/* Max number of the retransmitted packets without receiving an (acceptable)
|
||||
ACK from the destination. If this number is reached, a shorter timer
|
||||
will be started. */
|
||||
static int nf_ct_tcp_max_retrans __read_mostly = 3;
|
||||
|
||||
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
|
||||
closely. They're more complex. --RR */
|
||||
|
||||
|
@ -1436,9 +1422,23 @@ void nf_conntrack_tcp_init_net(struct net *net)
|
|||
* ->timeouts[0] contains 'new' timeout, like udp or icmp.
|
||||
*/
|
||||
tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
|
||||
tn->tcp_loose = nf_ct_tcp_loose;
|
||||
tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
|
||||
tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
|
||||
|
||||
/* If it is set to zero, we disable picking up already established
|
||||
* connections.
|
||||
*/
|
||||
tn->tcp_loose = 1;
|
||||
|
||||
/* "Be conservative in what you do,
|
||||
* be liberal in what you accept from others."
|
||||
* If it's non-zero, we mark only out of window RST segments as INVALID.
|
||||
*/
|
||||
tn->tcp_be_liberal = 0;
|
||||
|
||||
/* Max number of the retransmitted packets without receiving an (acceptable)
|
||||
* ACK from the destination. If this number is reached, a shorter timer
|
||||
* will be started.
|
||||
*/
|
||||
tn->tcp_max_retrans = 3;
|
||||
}
|
||||
|
||||
const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
|
||||
|
|
|
@ -425,14 +425,16 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
|
|||
static int ct_cpu_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct net *net = seq_file_net(seq);
|
||||
unsigned int nr_conntracks = atomic_read(&net->ct.count);
|
||||
const struct ip_conntrack_stat *st = v;
|
||||
unsigned int nr_conntracks;
|
||||
|
||||
if (v == SEQ_START_TOKEN) {
|
||||
seq_puts(seq, "entries clashres found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
nr_conntracks = nf_conntrack_count(net);
|
||||
|
||||
seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
|
||||
"%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
|
||||
nr_conntracks,
|
||||
|
@ -508,13 +510,19 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
|
|||
}
|
||||
#endif /* CONFIG_NF_CONNTRACK_PROCFS */
|
||||
|
||||
u32 nf_conntrack_count(const struct net *net)
|
||||
{
|
||||
const struct nf_conntrack_net *cnet;
|
||||
|
||||
cnet = net_generic(net, nf_conntrack_net_id);
|
||||
|
||||
return atomic_read(&cnet->count);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_conntrack_count);
|
||||
|
||||
/* Sysctl support */
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
/* Log invalid packets of a given protocol */
|
||||
static int log_invalid_proto_min __read_mostly;
|
||||
static int log_invalid_proto_max __read_mostly = 255;
|
||||
|
||||
/* size the user *wants to set */
|
||||
static unsigned int nf_conntrack_htable_size_user __read_mostly;
|
||||
|
||||
|
@ -615,7 +623,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
|
|||
},
|
||||
[NF_SYSCTL_CT_COUNT] = {
|
||||
.procname = "nf_conntrack_count",
|
||||
.data = &init_net.ct.count,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0444,
|
||||
.proc_handler = proc_dointvec,
|
||||
|
@ -630,20 +637,18 @@ static struct ctl_table nf_ct_sysctl_table[] = {
|
|||
[NF_SYSCTL_CT_CHECKSUM] = {
|
||||
.procname = "nf_conntrack_checksum",
|
||||
.data = &init_net.ct.sysctl_checksum,
|
||||
.maxlen = sizeof(int),
|
||||
.maxlen = sizeof(u8),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.proc_handler = proc_dou8vec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
[NF_SYSCTL_CT_LOG_INVALID] = {
|
||||
.procname = "nf_conntrack_log_invalid",
|
||||
.data = &init_net.ct.sysctl_log_invalid,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.maxlen = sizeof(u8),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &log_invalid_proto_min,
|
||||
.extra2 = &log_invalid_proto_max,
|
||||
.proc_handler = proc_dou8vec_minmax,
|
||||
},
|
||||
[NF_SYSCTL_CT_EXPECT_MAX] = {
|
||||
.procname = "nf_conntrack_expect_max",
|
||||
|
@ -655,18 +660,17 @@ static struct ctl_table nf_ct_sysctl_table[] = {
|
|||
[NF_SYSCTL_CT_ACCT] = {
|
||||
.procname = "nf_conntrack_acct",
|
||||
.data = &init_net.ct.sysctl_acct,
|
||||
.maxlen = sizeof(int),
|
||||
.maxlen = sizeof(u8),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.proc_handler = proc_dou8vec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
[NF_SYSCTL_CT_HELPER] = {
|
||||
.procname = "nf_conntrack_helper",
|
||||
.data = &init_net.ct.sysctl_auto_assign_helper,
|
||||
.maxlen = sizeof(int),
|
||||
.maxlen = sizeof(u8),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.proc_handler = proc_dou8vec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
|
@ -674,9 +678,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
|
|||
[NF_SYSCTL_CT_EVENTS] = {
|
||||
.procname = "nf_conntrack_events",
|
||||
.data = &init_net.ct.sysctl_events,
|
||||
.maxlen = sizeof(int),
|
||||
.maxlen = sizeof(u8),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.proc_handler = proc_dou8vec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
|
@ -685,9 +689,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
|
|||
[NF_SYSCTL_CT_TIMESTAMP] = {
|
||||
.procname = "nf_conntrack_timestamp",
|
||||
.data = &init_net.ct.sysctl_tstamp,
|
||||
.maxlen = sizeof(int),
|
||||
.maxlen = sizeof(u8),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.proc_handler = proc_dou8vec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
|
@ -760,25 +764,25 @@ static struct ctl_table nf_ct_sysctl_table[] = {
|
|||
},
|
||||
[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
|
||||
.procname = "nf_conntrack_tcp_loose",
|
||||
.maxlen = sizeof(int),
|
||||
.maxlen = sizeof(u8),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.proc_handler = proc_dou8vec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
[NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = {
|
||||
.procname = "nf_conntrack_tcp_be_liberal",
|
||||
.maxlen = sizeof(int),
|
||||
.maxlen = sizeof(u8),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.proc_handler = proc_dou8vec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
[NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = {
|
||||
.procname = "nf_conntrack_tcp_max_retrans",
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.maxlen = sizeof(u8),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
.proc_handler = proc_dou8vec_minmax,
|
||||
},
|
||||
[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP] = {
|
||||
.procname = "nf_conntrack_udp_timeout",
|
||||
|
@ -905,9 +909,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
|
|||
},
|
||||
[NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = {
|
||||
.procname = "nf_conntrack_dccp_loose",
|
||||
.maxlen = sizeof(int),
|
||||
.maxlen = sizeof(u8),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.proc_handler = proc_dou8vec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
|
@ -1039,11 +1043,11 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
|
|||
if (!table)
|
||||
return -ENOMEM;
|
||||
|
||||
table[NF_SYSCTL_CT_COUNT].data = &net->ct.count;
|
||||
table[NF_SYSCTL_CT_COUNT].data = &cnet->count;
|
||||
table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum;
|
||||
table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid;
|
||||
table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct;
|
||||
table[NF_SYSCTL_CT_HELPER].data = &net->ct.sysctl_auto_assign_helper;
|
||||
table[NF_SYSCTL_CT_HELPER].data = &cnet->sysctl_auto_assign_helper;
|
||||
#ifdef CONFIG_NF_CONNTRACK_EVENTS
|
||||
table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events;
|
||||
#endif
|
||||
|
|
|
@ -130,6 +130,9 @@ static int flow_offload_fill_route(struct flow_offload *flow,
|
|||
flow_tuple->dst_cache = dst;
|
||||
flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
}
|
||||
flow_tuple->xmit_type = route->tuple[dir].xmit_type;
|
||||
|
||||
|
|
|
@ -78,6 +78,16 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
|
|||
match->dissector.used_keys |= enc_keys;
|
||||
}
|
||||
|
||||
static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
|
||||
struct flow_dissector_key_vlan *mask,
|
||||
u16 vlan_id, __be16 proto)
|
||||
{
|
||||
key->vlan_id = vlan_id;
|
||||
mask->vlan_id = VLAN_VID_MASK;
|
||||
key->vlan_tpid = proto;
|
||||
mask->vlan_tpid = 0xffff;
|
||||
}
|
||||
|
||||
static int nf_flow_rule_match(struct nf_flow_match *match,
|
||||
const struct flow_offload_tuple *tuple,
|
||||
struct dst_entry *other_dst)
|
||||
|
@ -85,6 +95,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
|
|||
struct nf_flow_key *mask = &match->mask;
|
||||
struct nf_flow_key *key = &match->key;
|
||||
struct ip_tunnel_info *tun_info;
|
||||
bool vlan_encap = false;
|
||||
|
||||
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
|
||||
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
|
||||
|
@ -102,6 +113,32 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
|
|||
key->meta.ingress_ifindex = tuple->iifidx;
|
||||
mask->meta.ingress_ifindex = 0xffffffff;
|
||||
|
||||
if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
|
||||
tuple->encap[0].proto == htons(ETH_P_8021Q)) {
|
||||
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
|
||||
nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
|
||||
tuple->encap[0].id,
|
||||
tuple->encap[0].proto);
|
||||
vlan_encap = true;
|
||||
}
|
||||
|
||||
if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
|
||||
tuple->encap[1].proto == htons(ETH_P_8021Q)) {
|
||||
if (vlan_encap) {
|
||||
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
|
||||
cvlan);
|
||||
nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
|
||||
tuple->encap[1].id,
|
||||
tuple->encap[1].proto);
|
||||
} else {
|
||||
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
|
||||
vlan);
|
||||
nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
|
||||
tuple->encap[1].id,
|
||||
tuple->encap[1].proto);
|
||||
}
|
||||
}
|
||||
|
||||
switch (tuple->l3proto) {
|
||||
case AF_INET:
|
||||
key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
|
||||
|
@ -582,6 +619,7 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
|
|||
struct nf_flow_rule *flow_rule)
|
||||
{
|
||||
const struct flow_offload_tuple *other_tuple;
|
||||
const struct flow_offload_tuple *tuple;
|
||||
int i;
|
||||
|
||||
flow_offload_decap_tunnel(flow, dir, flow_rule);
|
||||
|
@ -591,6 +629,20 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
|
|||
flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
|
||||
return -1;
|
||||
|
||||
tuple = &flow->tuplehash[dir].tuple;
|
||||
|
||||
for (i = 0; i < tuple->encap_num; i++) {
|
||||
struct flow_action_entry *entry;
|
||||
|
||||
if (tuple->in_vlan_ingress & BIT(i))
|
||||
continue;
|
||||
|
||||
if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
|
||||
entry = flow_action_entry_next(flow_rule);
|
||||
entry->id = FLOW_ACTION_VLAN_POP;
|
||||
}
|
||||
}
|
||||
|
||||
other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
|
||||
for (i = 0; i < other_tuple->encap_num; i++) {
|
||||
|
|
|
@ -2878,6 +2878,9 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
|
|||
goto nla_put_failure;
|
||||
}
|
||||
|
||||
if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
|
||||
nft_flow_rule_stats(chain, rule);
|
||||
|
||||
list = nla_nest_start_noflag(skb, NFTA_RULE_EXPRESSIONS);
|
||||
if (list == NULL)
|
||||
goto nla_put_failure;
|
||||
|
|
|
@ -47,6 +47,48 @@ void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
|
|||
offsetof(struct nft_flow_key, control);
|
||||
}
|
||||
|
||||
struct nft_offload_ethertype {
|
||||
__be16 value;
|
||||
__be16 mask;
|
||||
};
|
||||
|
||||
static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
|
||||
struct nft_flow_rule *flow)
|
||||
{
|
||||
struct nft_flow_match *match = &flow->match;
|
||||
struct nft_offload_ethertype ethertype;
|
||||
|
||||
if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL) &&
|
||||
match->key.basic.n_proto != htons(ETH_P_8021Q) &&
|
||||
match->key.basic.n_proto != htons(ETH_P_8021AD))
|
||||
return;
|
||||
|
||||
ethertype.value = match->key.basic.n_proto;
|
||||
ethertype.mask = match->mask.basic.n_proto;
|
||||
|
||||
if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_VLAN) &&
|
||||
(match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) ||
|
||||
match->key.vlan.vlan_tpid == htons(ETH_P_8021AD))) {
|
||||
match->key.basic.n_proto = match->key.cvlan.vlan_tpid;
|
||||
match->mask.basic.n_proto = match->mask.cvlan.vlan_tpid;
|
||||
match->key.cvlan.vlan_tpid = match->key.vlan.vlan_tpid;
|
||||
match->mask.cvlan.vlan_tpid = match->mask.vlan.vlan_tpid;
|
||||
match->key.vlan.vlan_tpid = ethertype.value;
|
||||
match->mask.vlan.vlan_tpid = ethertype.mask;
|
||||
match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] =
|
||||
offsetof(struct nft_flow_key, cvlan);
|
||||
match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CVLAN);
|
||||
} else {
|
||||
match->key.basic.n_proto = match->key.vlan.vlan_tpid;
|
||||
match->mask.basic.n_proto = match->mask.vlan.vlan_tpid;
|
||||
match->key.vlan.vlan_tpid = ethertype.value;
|
||||
match->mask.vlan.vlan_tpid = ethertype.mask;
|
||||
match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
|
||||
offsetof(struct nft_flow_key, vlan);
|
||||
match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_VLAN);
|
||||
}
|
||||
}
|
||||
|
||||
struct nft_flow_rule *nft_flow_rule_create(struct net *net,
|
||||
const struct nft_rule *rule)
|
||||
{
|
||||
|
@ -91,6 +133,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
|
|||
|
||||
expr = nft_expr_next(expr);
|
||||
}
|
||||
nft_flow_rule_transfer_vlan(ctx, flow);
|
||||
|
||||
flow->proto = ctx->dep.l3num;
|
||||
kfree(ctx);
|
||||
|
||||
|
@ -199,26 +243,56 @@ static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
|
|||
cls_flow->rule = flow->rule;
|
||||
}
|
||||
|
||||
static int nft_flow_offload_rule(struct nft_chain *chain,
|
||||
struct nft_rule *rule,
|
||||
struct nft_flow_rule *flow,
|
||||
enum flow_cls_command command)
|
||||
static int nft_flow_offload_cmd(const struct nft_chain *chain,
|
||||
const struct nft_rule *rule,
|
||||
struct nft_flow_rule *flow,
|
||||
enum flow_cls_command command,
|
||||
struct flow_cls_offload *cls_flow)
|
||||
{
|
||||
struct netlink_ext_ack extack = {};
|
||||
struct flow_cls_offload cls_flow;
|
||||
struct nft_base_chain *basechain;
|
||||
|
||||
if (!nft_is_base_chain(chain))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
basechain = nft_base_chain(chain);
|
||||
nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, &extack,
|
||||
nft_flow_cls_offload_setup(cls_flow, basechain, rule, flow, &extack,
|
||||
command);
|
||||
|
||||
return nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow,
|
||||
return nft_setup_cb_call(TC_SETUP_CLSFLOWER, cls_flow,
|
||||
&basechain->flow_block.cb_list);
|
||||
}
|
||||
|
||||
static int nft_flow_offload_rule(const struct nft_chain *chain,
|
||||
struct nft_rule *rule,
|
||||
struct nft_flow_rule *flow,
|
||||
enum flow_cls_command command)
|
||||
{
|
||||
struct flow_cls_offload cls_flow;
|
||||
|
||||
return nft_flow_offload_cmd(chain, rule, flow, command, &cls_flow);
|
||||
}
|
||||
|
||||
int nft_flow_rule_stats(const struct nft_chain *chain,
|
||||
const struct nft_rule *rule)
|
||||
{
|
||||
struct flow_cls_offload cls_flow = {};
|
||||
struct nft_expr *expr, *next;
|
||||
int err;
|
||||
|
||||
err = nft_flow_offload_cmd(chain, rule, NULL, FLOW_CLS_STATS,
|
||||
&cls_flow);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
nft_rule_for_each_expr(expr, next, rule) {
|
||||
if (expr->ops->offload_stats)
|
||||
expr->ops->offload_stats(expr, &cls_flow.stats);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nft_flow_offload_bind(struct flow_block_offload *bo,
|
||||
struct nft_base_chain *basechain)
|
||||
{
|
||||
|
|
|
@ -114,19 +114,56 @@ static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr)
|
|||
return -1;
|
||||
}
|
||||
|
||||
union nft_cmp_offload_data {
|
||||
u16 val16;
|
||||
u32 val32;
|
||||
u64 val64;
|
||||
};
|
||||
|
||||
static void nft_payload_n2h(union nft_cmp_offload_data *data,
|
||||
const u8 *val, u32 len)
|
||||
{
|
||||
switch (len) {
|
||||
case 2:
|
||||
data->val16 = ntohs(*((u16 *)val));
|
||||
break;
|
||||
case 4:
|
||||
data->val32 = ntohl(*((u32 *)val));
|
||||
break;
|
||||
case 8:
|
||||
data->val64 = be64_to_cpu(*((u64 *)val));
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
|
||||
struct nft_flow_rule *flow,
|
||||
const struct nft_cmp_expr *priv)
|
||||
{
|
||||
struct nft_offload_reg *reg = &ctx->regs[priv->sreg];
|
||||
union nft_cmp_offload_data _data, _datamask;
|
||||
u8 *mask = (u8 *)&flow->match.mask;
|
||||
u8 *key = (u8 *)&flow->match.key;
|
||||
u8 *data, *datamask;
|
||||
|
||||
if (priv->op != NFT_CMP_EQ || priv->len > reg->len)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
memcpy(key + reg->offset, &priv->data, reg->len);
|
||||
memcpy(mask + reg->offset, ®->mask, reg->len);
|
||||
if (reg->flags & NFT_OFFLOAD_F_NETWORK2HOST) {
|
||||
nft_payload_n2h(&_data, (u8 *)&priv->data, reg->len);
|
||||
nft_payload_n2h(&_datamask, (u8 *)®->mask, reg->len);
|
||||
data = (u8 *)&_data;
|
||||
datamask = (u8 *)&_datamask;
|
||||
} else {
|
||||
data = (u8 *)&priv->data;
|
||||
datamask = (u8 *)®->mask;
|
||||
}
|
||||
|
||||
memcpy(key + reg->offset, data, reg->len);
|
||||
memcpy(mask + reg->offset, datamask, reg->len);
|
||||
|
||||
flow->match.dissector.used_keys |= BIT(reg->key);
|
||||
flow->match.dissector.offset[reg->key] = reg->base_offset;
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <linux/netfilter.h>
|
||||
#include <linux/netfilter/nf_tables.h>
|
||||
#include <net/netfilter/nf_tables.h>
|
||||
#include <net/netfilter/nf_tables_offload.h>
|
||||
|
||||
struct nft_counter {
|
||||
s64 bytes;
|
||||
|
@ -248,6 +249,32 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int nft_counter_offload(struct nft_offload_ctx *ctx,
|
||||
struct nft_flow_rule *flow,
|
||||
const struct nft_expr *expr)
|
||||
{
|
||||
/* No specific offload action is needed, but report success. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nft_counter_offload_stats(struct nft_expr *expr,
|
||||
const struct flow_stats *stats)
|
||||
{
|
||||
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
|
||||
struct nft_counter *this_cpu;
|
||||
seqcount_t *myseq;
|
||||
|
||||
preempt_disable();
|
||||
this_cpu = this_cpu_ptr(priv->counter);
|
||||
myseq = this_cpu_ptr(&nft_counter_seq);
|
||||
|
||||
write_seqcount_begin(myseq);
|
||||
this_cpu->packets += stats->pkts;
|
||||
this_cpu->bytes += stats->bytes;
|
||||
write_seqcount_end(myseq);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static struct nft_expr_type nft_counter_type;
|
||||
static const struct nft_expr_ops nft_counter_ops = {
|
||||
.type = &nft_counter_type,
|
||||
|
@ -258,6 +285,8 @@ static const struct nft_expr_ops nft_counter_ops = {
|
|||
.destroy_clone = nft_counter_destroy,
|
||||
.dump = nft_counter_dump,
|
||||
.clone = nft_counter_clone,
|
||||
.offload = nft_counter_offload,
|
||||
.offload_stats = nft_counter_offload_stats,
|
||||
};
|
||||
|
||||
static struct nft_expr_type nft_counter_type __read_mostly = {
|
||||
|
|
|
@ -226,8 +226,9 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
|
|||
if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan,
|
||||
vlan_tci, sizeof(__be16), reg);
|
||||
NFT_OFFLOAD_MATCH_FLAGS(FLOW_DISSECTOR_KEY_VLAN, vlan,
|
||||
vlan_tci, sizeof(__be16), reg,
|
||||
NFT_OFFLOAD_F_NETWORK2HOST);
|
||||
break;
|
||||
case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto):
|
||||
if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
|
||||
|
@ -241,16 +242,18 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
|
|||
if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan,
|
||||
vlan_tci, sizeof(__be16), reg);
|
||||
NFT_OFFLOAD_MATCH_FLAGS(FLOW_DISSECTOR_KEY_CVLAN, cvlan,
|
||||
vlan_tci, sizeof(__be16), reg,
|
||||
NFT_OFFLOAD_F_NETWORK2HOST);
|
||||
break;
|
||||
case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) +
|
||||
sizeof(struct vlan_hdr):
|
||||
if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan,
|
||||
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, cvlan,
|
||||
vlan_tpid, sizeof(__be16), reg);
|
||||
nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
|
|
|
@ -9,7 +9,7 @@ ret=0
|
|||
ksft_skip=4
|
||||
|
||||
# all tests in this script. Can be overridden with -t option
|
||||
TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr"
|
||||
TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle"
|
||||
|
||||
VERBOSE=0
|
||||
PAUSE_ON_FAIL=no
|
||||
|
@ -1653,6 +1653,154 @@ ipv4_route_v6_gw_test()
|
|||
route_cleanup
|
||||
}
|
||||
|
||||
socat_check()
|
||||
{
|
||||
if [ ! -x "$(command -v socat)" ]; then
|
||||
echo "socat command not found. Skipping test"
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
iptables_check()
|
||||
{
|
||||
iptables -t mangle -L OUTPUT &> /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "iptables configuration not supported. Skipping test"
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
ip6tables_check()
|
||||
{
|
||||
ip6tables -t mangle -L OUTPUT &> /dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "ip6tables configuration not supported. Skipping test"
|
||||
return 1
|
||||
fi
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
ipv4_mangle_test()
|
||||
{
|
||||
local rc
|
||||
|
||||
echo
|
||||
echo "IPv4 mangling tests"
|
||||
|
||||
socat_check || return 1
|
||||
iptables_check || return 1
|
||||
|
||||
route_setup
|
||||
sleep 2
|
||||
|
||||
local tmp_file=$(mktemp)
|
||||
ip netns exec ns2 socat UDP4-LISTEN:54321,fork $tmp_file &
|
||||
|
||||
# Add a FIB rule and a route that will direct our connection to the
|
||||
# listening server.
|
||||
$IP rule add pref 100 ipproto udp sport 12345 dport 54321 table 123
|
||||
$IP route add table 123 172.16.101.0/24 dev veth1
|
||||
|
||||
# Add an unreachable route to the main table that will block our
|
||||
# connection in case the FIB rule is not hit.
|
||||
$IP route add unreachable 172.16.101.2/32
|
||||
|
||||
run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
|
||||
log_test $? 0 " Connection with correct parameters"
|
||||
|
||||
run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=11111"
|
||||
log_test $? 1 " Connection with incorrect parameters"
|
||||
|
||||
# Add a mangling rule and make sure connection is still successful.
|
||||
$NS_EXEC iptables -t mangle -A OUTPUT -j MARK --set-mark 1
|
||||
|
||||
run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
|
||||
log_test $? 0 " Connection with correct parameters - mangling"
|
||||
|
||||
# Delete the mangling rule and make sure connection is still
|
||||
# successful.
|
||||
$NS_EXEC iptables -t mangle -D OUTPUT -j MARK --set-mark 1
|
||||
|
||||
run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
|
||||
log_test $? 0 " Connection with correct parameters - no mangling"
|
||||
|
||||
# Verify connections were indeed successful on server side.
|
||||
[[ $(cat $tmp_file | wc -l) -eq 3 ]]
|
||||
log_test $? 0 " Connection check - server side"
|
||||
|
||||
$IP route del unreachable 172.16.101.2/32
|
||||
$IP route del table 123 172.16.101.0/24 dev veth1
|
||||
$IP rule del pref 100
|
||||
|
||||
{ kill %% && wait %%; } 2>/dev/null
|
||||
rm $tmp_file
|
||||
|
||||
route_cleanup
|
||||
}
|
||||
|
||||
ipv6_mangle_test()
|
||||
{
|
||||
local rc
|
||||
|
||||
echo
|
||||
echo "IPv6 mangling tests"
|
||||
|
||||
socat_check || return 1
|
||||
ip6tables_check || return 1
|
||||
|
||||
route_setup
|
||||
sleep 2
|
||||
|
||||
local tmp_file=$(mktemp)
|
||||
ip netns exec ns2 socat UDP6-LISTEN:54321,fork $tmp_file &
|
||||
|
||||
# Add a FIB rule and a route that will direct our connection to the
|
||||
# listening server.
|
||||
$IP -6 rule add pref 100 ipproto udp sport 12345 dport 54321 table 123
|
||||
$IP -6 route add table 123 2001:db8:101::/64 dev veth1
|
||||
|
||||
# Add an unreachable route to the main table that will block our
|
||||
# connection in case the FIB rule is not hit.
|
||||
$IP -6 route add unreachable 2001:db8:101::2/128
|
||||
|
||||
run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
|
||||
log_test $? 0 " Connection with correct parameters"
|
||||
|
||||
run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=11111"
|
||||
log_test $? 1 " Connection with incorrect parameters"
|
||||
|
||||
# Add a mangling rule and make sure connection is still successful.
|
||||
$NS_EXEC ip6tables -t mangle -A OUTPUT -j MARK --set-mark 1
|
||||
|
||||
run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
|
||||
log_test $? 0 " Connection with correct parameters - mangling"
|
||||
|
||||
# Delete the mangling rule and make sure connection is still
|
||||
# successful.
|
||||
$NS_EXEC ip6tables -t mangle -D OUTPUT -j MARK --set-mark 1
|
||||
|
||||
run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
|
||||
log_test $? 0 " Connection with correct parameters - no mangling"
|
||||
|
||||
# Verify connections were indeed successful on server side.
|
||||
[[ $(cat $tmp_file | wc -l) -eq 3 ]]
|
||||
log_test $? 0 " Connection check - server side"
|
||||
|
||||
$IP -6 route del unreachable 2001:db8:101::2/128
|
||||
$IP -6 route del table 123 2001:db8:101::/64 dev veth1
|
||||
$IP -6 rule del pref 100
|
||||
|
||||
{ kill %% && wait %%; } 2>/dev/null
|
||||
rm $tmp_file
|
||||
|
||||
route_cleanup
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# usage
|
||||
|
||||
|
@ -1725,6 +1873,8 @@ do
|
|||
ipv6_route_metrics) ipv6_route_metrics_test;;
|
||||
ipv4_route_metrics) ipv4_route_metrics_test;;
|
||||
ipv4_route_v6_gw) ipv4_route_v6_gw_test;;
|
||||
ipv4_mangle) ipv4_mangle_test;;
|
||||
ipv6_mangle) ipv6_mangle_test;;
|
||||
|
||||
help) echo "Test names: $TESTS"; exit 0;;
|
||||
esac
|
||||
|
|
Loading…
Reference in New Issue