mirror of https://gitee.com/openkylin/linux.git
netfilter: push reasm skb through instead of original frag skbs
Pushing original fragments through causes several problems. For example for matching, frags may not be matched correctly. Take following example: <example> On HOSTA do: ip6tables -I INPUT -p icmpv6 -j DROP ip6tables -I INPUT -p icmpv6 -m icmp6 --icmpv6-type 128 -j ACCEPT and on HOSTB you do: ping6 HOSTA -s2000 (MTU is 1500) Incoming echo requests will be filtered out on HOSTA. This issue does not occur with smaller packets than MTU (where fragmentation does not happen) </example> As was discussed previously, the only correct solution seems to be to use reassembled skb instead of separete frags. Doing this has positive side effects in reducing sk_buff by one pointer (nfct_reasm) and also the reams dances in ipvs and conntrack can be removed. Future plan is to remove net/ipv6/netfilter/nf_conntrack_reasm.c entirely and use code in net/ipv6/reassembly.c instead. Signed-off-by: Jiri Pirko <jiri@resnulli.us> Acked-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
9037c3579a
commit
6aafeef03b
|
@ -337,11 +337,6 @@ typedef unsigned int sk_buff_data_t;
|
|||
typedef unsigned char *sk_buff_data_t;
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
|
||||
defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
|
||||
#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* struct sk_buff - socket buffer
|
||||
* @next: Next buffer in list
|
||||
|
@ -374,7 +369,6 @@ typedef unsigned char *sk_buff_data_t;
|
|||
* @protocol: Packet protocol from driver
|
||||
* @destructor: Destruct function
|
||||
* @nfct: Associated connection, if any
|
||||
* @nfct_reasm: netfilter conntrack re-assembly pointer
|
||||
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
|
||||
* @skb_iif: ifindex of device we arrived on
|
||||
* @tc_index: Traffic control index
|
||||
|
@ -463,9 +457,6 @@ struct sk_buff {
|
|||
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
|
||||
struct nf_conntrack *nfct;
|
||||
#endif
|
||||
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
|
||||
struct sk_buff *nfct_reasm;
|
||||
#endif
|
||||
#ifdef CONFIG_BRIDGE_NETFILTER
|
||||
struct nf_bridge_info *nf_bridge;
|
||||
#endif
|
||||
|
@ -2595,18 +2586,6 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
|
|||
atomic_inc(&nfct->use);
|
||||
}
|
||||
#endif
|
||||
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
|
||||
static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
|
||||
{
|
||||
if (skb)
|
||||
atomic_inc(&skb->users);
|
||||
}
|
||||
static inline void nf_conntrack_put_reasm(struct sk_buff *skb)
|
||||
{
|
||||
if (skb)
|
||||
kfree_skb(skb);
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_BRIDGE_NETFILTER
|
||||
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
|
||||
{
|
||||
|
@ -2625,10 +2604,6 @@ static inline void nf_reset(struct sk_buff *skb)
|
|||
nf_conntrack_put(skb->nfct);
|
||||
skb->nfct = NULL;
|
||||
#endif
|
||||
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
|
||||
nf_conntrack_put_reasm(skb->nfct_reasm);
|
||||
skb->nfct_reasm = NULL;
|
||||
#endif
|
||||
#ifdef CONFIG_BRIDGE_NETFILTER
|
||||
nf_bridge_put(skb->nf_bridge);
|
||||
skb->nf_bridge = NULL;
|
||||
|
@ -2650,10 +2625,6 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
|
|||
nf_conntrack_get(src->nfct);
|
||||
dst->nfctinfo = src->nfctinfo;
|
||||
#endif
|
||||
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
|
||||
dst->nfct_reasm = src->nfct_reasm;
|
||||
nf_conntrack_get_reasm(src->nfct_reasm);
|
||||
#endif
|
||||
#ifdef CONFIG_BRIDGE_NETFILTER
|
||||
dst->nf_bridge = src->nf_bridge;
|
||||
nf_bridge_get(src->nf_bridge);
|
||||
|
@ -2665,9 +2636,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
|
|||
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
|
||||
nf_conntrack_put(dst->nfct);
|
||||
#endif
|
||||
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
|
||||
nf_conntrack_put_reasm(dst->nfct_reasm);
|
||||
#endif
|
||||
#ifdef CONFIG_BRIDGE_NETFILTER
|
||||
nf_bridge_put(dst->nf_bridge);
|
||||
#endif
|
||||
|
|
|
@ -109,7 +109,6 @@ extern int ip_vs_conn_tab_size;
|
|||
struct ip_vs_iphdr {
|
||||
__u32 len; /* IPv4 simply where L4 starts
|
||||
IPv6 where L4 Transport Header starts */
|
||||
__u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */
|
||||
__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
|
||||
__s16 protocol;
|
||||
__s32 flags;
|
||||
|
@ -117,34 +116,12 @@ struct ip_vs_iphdr {
|
|||
union nf_inet_addr daddr;
|
||||
};
|
||||
|
||||
/* Dependency to module: nf_defrag_ipv6 */
|
||||
#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
|
||||
static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
|
||||
{
|
||||
return skb->nfct_reasm;
|
||||
}
|
||||
static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
|
||||
int len, void *buffer,
|
||||
const struct ip_vs_iphdr *ipvsh)
|
||||
{
|
||||
if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb)))
|
||||
return skb_header_pointer(skb_nfct_reasm(skb),
|
||||
ipvsh->thoff_reasm, len, buffer);
|
||||
|
||||
return skb_header_pointer(skb, offset, len, buffer);
|
||||
}
|
||||
#else
|
||||
static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
|
||||
int len, void *buffer,
|
||||
const struct ip_vs_iphdr *ipvsh)
|
||||
{
|
||||
return skb_header_pointer(skb, offset, len, buffer);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
|
||||
|
@ -171,19 +148,12 @@ ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
|
|||
(struct ipv6hdr *)skb_network_header(skb);
|
||||
iphdr->saddr.in6 = iph->saddr;
|
||||
iphdr->daddr.in6 = iph->daddr;
|
||||
/* ipv6_find_hdr() updates len, flags, thoff_reasm */
|
||||
iphdr->thoff_reasm = 0;
|
||||
/* ipv6_find_hdr() updates len, flags */
|
||||
iphdr->len = 0;
|
||||
iphdr->flags = 0;
|
||||
iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1,
|
||||
&iphdr->fragoffs,
|
||||
&iphdr->flags);
|
||||
/* get proto from re-assembled packet and it's offset */
|
||||
if (skb_nfct_reasm(skb))
|
||||
iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb),
|
||||
&iphdr->thoff_reasm,
|
||||
-1, NULL, NULL);
|
||||
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
|
|
|
@ -6,9 +6,7 @@ void nf_defrag_ipv6_enable(void);
|
|||
int nf_ct_frag6_init(void);
|
||||
void nf_ct_frag6_cleanup(void);
|
||||
struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
|
||||
void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
|
||||
struct net_device *in, struct net_device *out,
|
||||
int (*okfn)(struct sk_buff *));
|
||||
void nf_ct_frag6_consume_orig(struct sk_buff *skb);
|
||||
|
||||
struct inet_frags_ctl;
|
||||
|
||||
|
|
|
@ -592,9 +592,6 @@ static void skb_release_head_state(struct sk_buff *skb)
|
|||
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
|
||||
nf_conntrack_put(skb->nfct);
|
||||
#endif
|
||||
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
|
||||
nf_conntrack_put_reasm(skb->nfct_reasm);
|
||||
#endif
|
||||
#ifdef CONFIG_BRIDGE_NETFILTER
|
||||
nf_bridge_put(skb->nf_bridge);
|
||||
#endif
|
||||
|
|
|
@ -169,64 +169,13 @@ static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
|
|||
return nf_conntrack_confirm(skb);
|
||||
}
|
||||
|
||||
static unsigned int __ipv6_conntrack_in(struct net *net,
|
||||
unsigned int hooknum,
|
||||
struct sk_buff *skb,
|
||||
const struct net_device *in,
|
||||
const struct net_device *out,
|
||||
int (*okfn)(struct sk_buff *))
|
||||
{
|
||||
struct sk_buff *reasm = skb->nfct_reasm;
|
||||
const struct nf_conn_help *help;
|
||||
struct nf_conn *ct;
|
||||
enum ip_conntrack_info ctinfo;
|
||||
|
||||
/* This packet is fragmented and has reassembled packet. */
|
||||
if (reasm) {
|
||||
/* Reassembled packet isn't parsed yet ? */
|
||||
if (!reasm->nfct) {
|
||||
unsigned int ret;
|
||||
|
||||
ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
|
||||
if (ret != NF_ACCEPT)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Conntrack helpers need the entire reassembled packet in the
|
||||
* POST_ROUTING hook. In case of unconfirmed connections NAT
|
||||
* might reassign a helper, so the entire packet is also
|
||||
* required.
|
||||
*/
|
||||
ct = nf_ct_get(reasm, &ctinfo);
|
||||
if (ct != NULL && !nf_ct_is_untracked(ct)) {
|
||||
help = nfct_help(ct);
|
||||
if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
|
||||
nf_conntrack_get_reasm(reasm);
|
||||
NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
|
||||
(struct net_device *)in,
|
||||
(struct net_device *)out,
|
||||
okfn, NF_IP6_PRI_CONNTRACK + 1);
|
||||
return NF_DROP_ERR(-ECANCELED);
|
||||
}
|
||||
}
|
||||
|
||||
nf_conntrack_get(reasm->nfct);
|
||||
skb->nfct = reasm->nfct;
|
||||
skb->nfctinfo = reasm->nfctinfo;
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
return nf_conntrack_in(net, PF_INET6, hooknum, skb);
|
||||
}
|
||||
|
||||
static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
|
||||
struct sk_buff *skb,
|
||||
const struct net_device *in,
|
||||
const struct net_device *out,
|
||||
int (*okfn)(struct sk_buff *))
|
||||
{
|
||||
return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out,
|
||||
okfn);
|
||||
return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb);
|
||||
}
|
||||
|
||||
static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
|
||||
|
@ -240,8 +189,7 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
|
|||
net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out,
|
||||
okfn);
|
||||
return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb);
|
||||
}
|
||||
|
||||
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
|
||||
|
|
|
@ -633,31 +633,16 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
|
|||
return skb;
|
||||
}
|
||||
|
||||
void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
|
||||
struct net_device *in, struct net_device *out,
|
||||
int (*okfn)(struct sk_buff *))
|
||||
void nf_ct_frag6_consume_orig(struct sk_buff *skb)
|
||||
{
|
||||
struct sk_buff *s, *s2;
|
||||
unsigned int ret = 0;
|
||||
|
||||
for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
|
||||
nf_conntrack_put_reasm(s->nfct_reasm);
|
||||
nf_conntrack_get_reasm(skb);
|
||||
s->nfct_reasm = skb;
|
||||
|
||||
s2 = s->next;
|
||||
s->next = NULL;
|
||||
|
||||
if (ret != -ECANCELED)
|
||||
ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
|
||||
in, out, okfn,
|
||||
NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
|
||||
else
|
||||
kfree_skb(s);
|
||||
|
||||
consume_skb(s);
|
||||
s = s2;
|
||||
}
|
||||
nf_conntrack_put_reasm(skb);
|
||||
}
|
||||
|
||||
static int nf_ct_net_init(struct net *net)
|
||||
|
|
|
@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
|
|||
if (reasm == skb)
|
||||
return NF_ACCEPT;
|
||||
|
||||
nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in,
|
||||
(struct net_device *)out, okfn);
|
||||
nf_ct_frag6_consume_orig(reasm);
|
||||
|
||||
NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm,
|
||||
(struct net_device *) in, (struct net_device *) out,
|
||||
okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
|
||||
|
||||
return NF_STOLEN;
|
||||
}
|
||||
|
|
|
@ -1139,12 +1139,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
|
|||
ip_vs_fill_iph_skb(af, skb, &iph);
|
||||
#ifdef CONFIG_IP_VS_IPV6
|
||||
if (af == AF_INET6) {
|
||||
if (!iph.fragoffs && skb_nfct_reasm(skb)) {
|
||||
struct sk_buff *reasm = skb_nfct_reasm(skb);
|
||||
/* Save fw mark for coming frags */
|
||||
reasm->ipvs_property = 1;
|
||||
reasm->mark = skb->mark;
|
||||
}
|
||||
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
|
||||
int related;
|
||||
int verdict = ip_vs_out_icmp_v6(skb, &related,
|
||||
|
@ -1614,12 +1608,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
|
|||
|
||||
#ifdef CONFIG_IP_VS_IPV6
|
||||
if (af == AF_INET6) {
|
||||
if (!iph.fragoffs && skb_nfct_reasm(skb)) {
|
||||
struct sk_buff *reasm = skb_nfct_reasm(skb);
|
||||
/* Save fw mark for coming frags. */
|
||||
reasm->ipvs_property = 1;
|
||||
reasm->mark = skb->mark;
|
||||
}
|
||||
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
|
||||
int related;
|
||||
int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
|
||||
|
@ -1671,9 +1659,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
|
|||
/* sorry, all this trouble for a no-hit :) */
|
||||
IP_VS_DBG_PKT(12, af, pp, skb, 0,
|
||||
"ip_vs_in: packet continues traversal as normal");
|
||||
if (iph.fragoffs && !skb_nfct_reasm(skb)) {
|
||||
if (iph.fragoffs) {
|
||||
/* Fragment that couldn't be mapped to a conn entry
|
||||
* and don't have any pointer to a reasm skb
|
||||
* is missing module nf_defrag_ipv6
|
||||
*/
|
||||
IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
|
||||
|
@ -1755,38 +1742,6 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
|
|||
|
||||
#ifdef CONFIG_IP_VS_IPV6
|
||||
|
||||
/*
|
||||
* AF_INET6 fragment handling
|
||||
* Copy info from first fragment, to the rest of them.
|
||||
*/
|
||||
static unsigned int
|
||||
ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb,
|
||||
const struct net_device *in,
|
||||
const struct net_device *out,
|
||||
int (*okfn)(struct sk_buff *))
|
||||
{
|
||||
struct sk_buff *reasm = skb_nfct_reasm(skb);
|
||||
struct net *net;
|
||||
|
||||
/* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
|
||||
* ipvs_property is set when checking first fragment
|
||||
* in ip_vs_in() and ip_vs_out().
|
||||
*/
|
||||
if (reasm)
|
||||
IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
|
||||
if (!reasm || !reasm->ipvs_property)
|
||||
return NF_ACCEPT;
|
||||
|
||||
net = skb_net(skb);
|
||||
if (!net_ipvs(net)->enable)
|
||||
return NF_ACCEPT;
|
||||
|
||||
/* Copy stored fw mark, saved in ip_vs_{in,out} */
|
||||
skb->mark = reasm->mark;
|
||||
|
||||
return NF_ACCEPT;
|
||||
}
|
||||
|
||||
/*
|
||||
* AF_INET6 handler in NF_INET_LOCAL_IN chain
|
||||
* Schedule and forward packets from remote clients
|
||||
|
@ -1924,14 +1879,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
|
|||
.priority = 100,
|
||||
},
|
||||
#ifdef CONFIG_IP_VS_IPV6
|
||||
/* After mangle & nat fetch 2:nd fragment and following */
|
||||
{
|
||||
.hook = ip_vs_preroute_frag6,
|
||||
.owner = THIS_MODULE,
|
||||
.pf = NFPROTO_IPV6,
|
||||
.hooknum = NF_INET_PRE_ROUTING,
|
||||
.priority = NF_IP6_PRI_NAT_DST + 1,
|
||||
},
|
||||
/* After packet filtering, change source only for VS/NAT */
|
||||
{
|
||||
.hook = ip_vs_reply6,
|
||||
|
|
|
@ -65,7 +65,6 @@ static int get_callid(const char *dptr, unsigned int dataoff,
|
|||
static int
|
||||
ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
|
||||
{
|
||||
struct sk_buff *reasm = skb_nfct_reasm(skb);
|
||||
struct ip_vs_iphdr iph;
|
||||
unsigned int dataoff, datalen, matchoff, matchlen;
|
||||
const char *dptr;
|
||||
|
@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
|
|||
/* todo: IPv6 fragments:
|
||||
* I think this only should be done for the first fragment. /HS
|
||||
*/
|
||||
if (reasm) {
|
||||
skb = reasm;
|
||||
dataoff = iph.thoff_reasm + sizeof(struct udphdr);
|
||||
} else
|
||||
dataoff = iph.len + sizeof(struct udphdr);
|
||||
|
||||
if (dataoff >= skb->len)
|
||||
return -EINVAL;
|
||||
/* todo: Check if this will mess-up the reasm skb !!! /HS */
|
||||
retc = skb_linearize(skb);
|
||||
if (retc < 0)
|
||||
return retc;
|
||||
|
|
Loading…
Reference in New Issue