Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6

This commit is contained in:
David S. Miller 2011-01-20 00:06:15 -08:00
commit a07aa004c8
135 changed files with 4523 additions and 1758 deletions

View File

@ -103,6 +103,8 @@
#define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */
#define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */
#define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */
#define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */
#define AUDIT_NETFILTER_CFG 1325 /* Netfilter chain modifications */
#define AUDIT_AVC 1400 /* SE Linux avc denial or grant */
#define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */

View File

@ -89,6 +89,14 @@
#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */
#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */
#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
IP_VS_CONN_F_NOOUTPUT | \
IP_VS_CONN_F_INACTIVE | \
IP_VS_CONN_F_SEQ_MASK | \
IP_VS_CONN_F_NO_CPORT | \
IP_VS_CONN_F_TEMPLATE \
)
/* Flags that are not sent to backup server start from bit 16 */
#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */

View File

@ -24,16 +24,20 @@
#define NF_MAX_VERDICT NF_STOP
/* we overload the higher bits for encoding auxiliary data such as the queue
* number. Not nice, but better than additional function arguments. */
#define NF_VERDICT_MASK 0x0000ffff
#define NF_VERDICT_BITS 16
* number or errno values. Not nice, but better than additional function
* arguments. */
#define NF_VERDICT_MASK 0x000000ff
/* extra verdict flags have mask 0x0000ff00 */
#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000
/* queue number (NF_QUEUE) or errno (NF_DROP) */
#define NF_VERDICT_QMASK 0xffff0000
#define NF_VERDICT_QBITS 16
#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE)
#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)
#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP)
#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
/* only for userspace compatibility */
#ifndef __KERNEL__
@ -41,6 +45,9 @@
<= 0x2000 is used for protocol-flags. */
#define NFC_UNKNOWN 0x4000
#define NFC_ALTERED 0x8000
/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
#define NF_VERDICT_BITS 16
#endif
enum nf_inet_hooks {
@ -72,6 +79,10 @@ union nf_inet_addr {
#ifdef __KERNEL__
#ifdef CONFIG_NETFILTER
static inline int NF_DROP_GETERR(int verdict)
{
return -(verdict >> NF_VERDICT_QBITS);
}
static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
const union nf_inet_addr *a2)
@ -267,7 +278,7 @@ struct nf_afinfo {
int route_key_size;
};
extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO];
extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
{
return rcu_dereference(nf_afinfo[family]);
@ -357,9 +368,9 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
#endif /*CONFIG_NETFILTER*/
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu;
extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
extern void (*nf_ct_destroy)(struct nf_conntrack *);
extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
#else
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
#endif

View File

@ -9,6 +9,7 @@ header-y += nfnetlink_conntrack.h
header-y += nfnetlink_log.h
header-y += nfnetlink_queue.h
header-y += x_tables.h
header-y += xt_AUDIT.h
header-y += xt_CHECKSUM.h
header-y += xt_CLASSIFY.h
header-y += xt_CONNMARK.h
@ -55,6 +56,7 @@ header-y += xt_rateest.h
header-y += xt_realm.h
header-y += xt_recent.h
header-y += xt_sctp.h
header-y += xt_socket.h
header-y += xt_state.h
header-y += xt_statistic.h
header-y += xt_string.h

View File

@ -0,0 +1,9 @@
#ifndef _NF_CONNTRACK_SNMP_H
#define _NF_CONNTRACK_SNMP_H
extern int (*nf_nat_snmp_hook)(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo);
#endif /* _NF_CONNTRACK_SNMP_H */

View File

@ -42,6 +42,7 @@ enum ctattr_type {
CTA_SECMARK, /* obsolete */
CTA_ZONE,
CTA_SECCTX,
CTA_TIMESTAMP,
__CTA_MAX
};
#define CTA_MAX (__CTA_MAX - 1)
@ -127,6 +128,14 @@ enum ctattr_counters {
};
#define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
enum ctattr_tstamp {
CTA_TIMESTAMP_UNSPEC,
CTA_TIMESTAMP_START,
CTA_TIMESTAMP_STOP,
__CTA_TIMESTAMP_MAX
};
#define CTA_TIMESTAMP_MAX (__CTA_TIMESTAMP_MAX - 1)
enum ctattr_nat {
CTA_NAT_UNSPEC,
CTA_NAT_MINIP,

View File

@ -611,8 +611,9 @@ struct _compat_xt_align {
extern void xt_compat_lock(u_int8_t af);
extern void xt_compat_unlock(u_int8_t af);
extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta);
extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
extern void xt_compat_flush_offsets(u_int8_t af);
extern void xt_compat_init_offsets(u_int8_t af, unsigned int number);
extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
extern int xt_compat_match_offset(const struct xt_match *match);

View File

@ -0,0 +1,30 @@
/*
* Header file for iptables xt_AUDIT target
*
* (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
* (C) 2010-2011 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef _XT_AUDIT_TARGET_H
#define _XT_AUDIT_TARGET_H
#include <linux/types.h>
enum {
XT_AUDIT_TYPE_ACCEPT = 0,
XT_AUDIT_TYPE_DROP,
XT_AUDIT_TYPE_REJECT,
__XT_AUDIT_TYPE_MAX,
};
#define XT_AUDIT_TYPE_MAX (__XT_AUDIT_TYPE_MAX - 1)
struct xt_audit_info {
__u8 type; /* XT_AUDIT_TYPE_* */
};
#endif /* _XT_AUDIT_TARGET_H */

View File

@ -4,11 +4,11 @@
#define XT_CT_NOTRACK 0x1
struct xt_ct_target_info {
u_int16_t flags;
u_int16_t zone;
u_int32_t ct_events;
u_int32_t exp_events;
char helper[16];
__u16 flags;
__u16 zone;
__u32 ct_events;
__u32 exp_events;
char helper[16];
/* Used internally by the kernel */
struct nf_conn *ct __attribute__((aligned(8)));

View File

@ -20,4 +20,10 @@ struct xt_NFQ_info_v1 {
__u16 queues_total;
};
struct xt_NFQ_info_v2 {
__u16 queuenum;
__u16 queues_total;
__u16 bypass;
};
#endif /* _XT_NFQ_TARGET_H */

View File

@ -7,7 +7,7 @@
(((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0)
struct xt_tcpoptstrip_target_info {
u_int32_t strip_bmap[8];
__u32 strip_bmap[8];
};
#endif /* _XT_TCPOPTSTRIP_H */

View File

@ -5,15 +5,15 @@
* redirection. We can get rid of that whenever we get support for
* mutliple targets in the same rule. */
struct xt_tproxy_target_info {
u_int32_t mark_mask;
u_int32_t mark_value;
__u32 mark_mask;
__u32 mark_value;
__be32 laddr;
__be16 lport;
};
struct xt_tproxy_target_info_v1 {
u_int32_t mark_mask;
u_int32_t mark_value;
__u32 mark_mask;
__u32 mark_value;
union nf_inet_addr laddr;
__be16 lport;
};

View File

@ -6,10 +6,10 @@ enum xt_cluster_flags {
};
struct xt_cluster_match_info {
u_int32_t total_nodes;
u_int32_t node_mask;
u_int32_t hash_seed;
u_int32_t flags;
__u32 total_nodes;
__u32 node_mask;
__u32 hash_seed;
__u32 flags;
};
#define XT_CLUSTER_NODES_MAX 32

View File

@ -4,7 +4,7 @@
#define XT_MAX_COMMENT_LEN 256
struct xt_comment_info {
unsigned char comment[XT_MAX_COMMENT_LEN];
char comment[XT_MAX_COMMENT_LEN];
};
#endif /* XT_COMMENT_H */

View File

@ -58,4 +58,19 @@ struct xt_conntrack_mtinfo2 {
__u16 state_mask, status_mask;
};
struct xt_conntrack_mtinfo3 {
union nf_inet_addr origsrc_addr, origsrc_mask;
union nf_inet_addr origdst_addr, origdst_mask;
union nf_inet_addr replsrc_addr, replsrc_mask;
union nf_inet_addr repldst_addr, repldst_mask;
__u32 expires_min, expires_max;
__u16 l4proto;
__u16 origsrc_port, origdst_port;
__u16 replsrc_port, repldst_port;
__u16 match_flags, invert_flags;
__u16 state_mask, status_mask;
__u16 origsrc_port_high, origdst_port_high;
__u16 replsrc_port_high, repldst_port_high;
};
#endif /*_XT_CONNTRACK_H*/

View File

@ -9,9 +9,9 @@ enum xt_quota_flags {
struct xt_quota_priv;
struct xt_quota_info {
u_int32_t flags;
u_int32_t pad;
aligned_u64 quota;
__u32 flags;
__u32 pad;
aligned_u64 quota;
/* Used internally by the kernel */
struct xt_quota_priv *master;

View File

@ -2,13 +2,13 @@
#define _XT_TIME_H 1
struct xt_time_info {
u_int32_t date_start;
u_int32_t date_stop;
u_int32_t daytime_start;
u_int32_t daytime_stop;
u_int32_t monthdays_match;
u_int8_t weekdays_match;
u_int8_t flags;
__u32 date_start;
__u32 date_stop;
__u32 daytime_start;
__u32 daytime_stop;
__u32 monthdays_match;
__u8 weekdays_match;
__u8 flags;
};
enum {

View File

@ -9,13 +9,13 @@ enum xt_u32_ops {
};
struct xt_u32_location_element {
u_int32_t number;
u_int8_t nextop;
__u32 number;
__u8 nextop;
};
struct xt_u32_value_element {
u_int32_t min;
u_int32_t max;
__u32 min;
__u32 max;
};
/*
@ -27,14 +27,14 @@ struct xt_u32_value_element {
struct xt_u32_test {
struct xt_u32_location_element location[XT_U32_MAXSIZE+1];
struct xt_u32_value_element value[XT_U32_MAXSIZE+1];
u_int8_t nnums;
u_int8_t nvalues;
__u8 nnums;
__u8 nvalues;
};
struct xt_u32 {
struct xt_u32_test tests[XT_U32_MAXSIZE+1];
u_int8_t ntests;
u_int8_t invert;
__u8 ntests;
__u8 invert;
};
#endif /* _XT_U32_H */

View File

@ -24,24 +24,24 @@
/* ui has one byte ctrl, ni has two */
struct hdr_ui {
uint8_t dsap;
uint8_t ssap;
uint8_t ctrl;
uint8_t orig[3];
__u8 dsap;
__u8 ssap;
__u8 ctrl;
__u8 orig[3];
__be16 type;
};
struct hdr_ni {
uint8_t dsap;
uint8_t ssap;
__u8 dsap;
__u8 ssap;
__be16 ctrl;
uint8_t orig[3];
__u8 orig[3];
__be16 type;
};
struct ebt_802_3_hdr {
uint8_t daddr[6];
uint8_t saddr[6];
__u8 daddr[6];
__u8 saddr[6];
__be16 len;
union {
struct hdr_ui ui;
@ -59,10 +59,10 @@ static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
#endif
struct ebt_802_3_info {
uint8_t sap;
__u8 sap;
__be16 type;
uint8_t bitmask;
uint8_t invflags;
__u8 bitmask;
__u8 invflags;
};
#endif

View File

@ -30,7 +30,7 @@
*/
struct ebt_mac_wormhash_tuple {
uint32_t cmp[2];
__u32 cmp[2];
__be32 ip;
};

View File

@ -27,8 +27,8 @@ struct ebt_arp_info
unsigned char smmsk[ETH_ALEN];
unsigned char dmaddr[ETH_ALEN];
unsigned char dmmsk[ETH_ALEN];
uint8_t bitmask;
uint8_t invflags;
__u8 bitmask;
__u8 invflags;
};
#endif

View File

@ -31,12 +31,12 @@ struct ebt_ip_info {
__be32 daddr;
__be32 smsk;
__be32 dmsk;
uint8_t tos;
uint8_t protocol;
uint8_t bitmask;
uint8_t invflags;
uint16_t sport[2];
uint16_t dport[2];
__u8 tos;
__u8 protocol;
__u8 bitmask;
__u8 invflags;
__u16 sport[2];
__u16 dport[2];
};
#endif

View File

@ -18,8 +18,11 @@
#define EBT_IP6_PROTO 0x08
#define EBT_IP6_SPORT 0x10
#define EBT_IP6_DPORT 0x20
#define EBT_IP6_ICMP6 0x40
#define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\
EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT)
EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT | \
EBT_IP6_ICMP6)
#define EBT_IP6_MATCH "ip6"
/* the same values are used for the invflags */
@ -28,12 +31,18 @@ struct ebt_ip6_info {
struct in6_addr daddr;
struct in6_addr smsk;
struct in6_addr dmsk;
uint8_t tclass;
uint8_t protocol;
uint8_t bitmask;
uint8_t invflags;
uint16_t sport[2];
uint16_t dport[2];
__u8 tclass;
__u8 protocol;
__u8 bitmask;
__u8 invflags;
union {
__u16 sport[2];
__u8 icmpv6_type[2];
};
union {
__u16 dport[2];
__u8 icmpv6_code[2];
};
};
#endif

View File

@ -10,13 +10,13 @@
seconds, or one every 59 hours. */
struct ebt_limit_info {
u_int32_t avg; /* Average secs between packets * scale */
u_int32_t burst; /* Period multiplier for upper limit. */
__u32 avg; /* Average secs between packets * scale */
__u32 burst; /* Period multiplier for upper limit. */
/* Used internally by the kernel */
unsigned long prev;
u_int32_t credit;
u_int32_t credit_cap, cost;
__u32 credit;
__u32 credit_cap, cost;
};
#endif

View File

@ -10,9 +10,9 @@
#define EBT_LOG_WATCHER "log"
struct ebt_log_info {
uint8_t loglevel;
uint8_t prefix[EBT_LOG_PREFIX_SIZE];
uint32_t bitmask;
__u8 loglevel;
__u8 prefix[EBT_LOG_PREFIX_SIZE];
__u32 bitmask;
};
#endif

View File

@ -6,8 +6,8 @@
#define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR)
struct ebt_mark_m_info {
unsigned long mark, mask;
uint8_t invert;
uint8_t bitmask;
__u8 invert;
__u8 bitmask;
};
#define EBT_MARK_MATCH "mark_m"

View File

@ -10,11 +10,11 @@
#define EBT_NFLOG_DEFAULT_THRESHOLD 1
struct ebt_nflog_info {
u_int32_t len;
u_int16_t group;
u_int16_t threshold;
u_int16_t flags;
u_int16_t pad;
__u32 len;
__u16 group;
__u16 threshold;
__u16 flags;
__u16 pad;
char prefix[EBT_NFLOG_PREFIX_SIZE];
};

View File

@ -2,8 +2,8 @@
#define __LINUX_BRIDGE_EBT_PKTTYPE_H
struct ebt_pkttype_info {
uint8_t pkt_type;
uint8_t invert;
__u8 pkt_type;
__u8 invert;
};
#define EBT_PKTTYPE_MATCH "pkttype"

View File

@ -21,24 +21,24 @@
#define EBT_STP_MATCH "stp"
struct ebt_stp_config_info {
uint8_t flags;
uint16_t root_priol, root_priou;
__u8 flags;
__u16 root_priol, root_priou;
char root_addr[6], root_addrmsk[6];
uint32_t root_costl, root_costu;
uint16_t sender_priol, sender_priou;
__u32 root_costl, root_costu;
__u16 sender_priol, sender_priou;
char sender_addr[6], sender_addrmsk[6];
uint16_t portl, portu;
uint16_t msg_agel, msg_ageu;
uint16_t max_agel, max_ageu;
uint16_t hello_timel, hello_timeu;
uint16_t forward_delayl, forward_delayu;
__u16 portl, portu;
__u16 msg_agel, msg_ageu;
__u16 max_agel, max_ageu;
__u16 hello_timel, hello_timeu;
__u16 forward_delayl, forward_delayu;
};
struct ebt_stp_info {
uint8_t type;
__u8 type;
struct ebt_stp_config_info config;
uint16_t bitmask;
uint16_t invflags;
__u16 bitmask;
__u16 invflags;
};
#endif

View File

@ -10,7 +10,7 @@
#define EBT_ULOG_VERSION 1
struct ebt_ulog_info {
uint32_t nlgroup;
__u32 nlgroup;
unsigned int cprange;
unsigned int qthreshold;
char prefix[EBT_ULOG_PREFIX_LEN];

View File

@ -8,12 +8,12 @@
#define EBT_VLAN_MATCH "vlan"
struct ebt_vlan_info {
uint16_t id; /* VLAN ID {1-4095} */
uint8_t prio; /* VLAN User Priority {0-7} */
__u16 id; /* VLAN ID {1-4095} */
__u8 prio; /* VLAN User Priority {0-7} */
__be16 encap; /* VLAN Encapsulated frame code {0-65535} */
uint8_t bitmask; /* Args bitmask bit 1=1 - ID arg,
__u8 bitmask; /* Args bitmask bit 1=1 - ID arg,
bit 2=1 User-Priority arg, bit 3=1 encap*/
uint8_t invflags; /* Inverse bitmask bit 1=1 - inversed ID arg,
__u8 invflags; /* Inverse bitmask bit 1=1 - inversed ID arg,
bit 2=1 - inversed Pirority arg */
};

View File

@ -17,15 +17,15 @@ struct clusterip_config;
struct ipt_clusterip_tgt_info {
u_int32_t flags;
__u32 flags;
/* only relevant for new ones */
u_int8_t clustermac[6];
u_int16_t num_total_nodes;
u_int16_t num_local_nodes;
u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
u_int32_t hash_mode;
u_int32_t hash_initval;
__u8 clustermac[6];
__u16 num_total_nodes;
__u16 num_local_nodes;
__u16 local_nodes[CLUSTERIP_MAX_NODES];
__u32 hash_mode;
__u32 hash_initval;
/* Used internally by the kernel */
struct clusterip_config *config;

View File

@ -19,11 +19,11 @@
#define IPT_ECN_OP_MASK 0xce
struct ipt_ECN_info {
u_int8_t operation; /* bitset of operations */
u_int8_t ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */
__u8 operation; /* bitset of operations */
__u8 ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */
union {
struct {
u_int8_t ece:1, cwr:1; /* TCP ECT bits */
__u8 ece:1, cwr:1; /* TCP ECT bits */
} tcp;
} proto;
};

View File

@ -7,9 +7,9 @@
struct ipt_same_info {
unsigned char info;
u_int32_t rangesize;
u_int32_t ipnum;
u_int32_t *iparray;
__u32 rangesize;
__u32 ipnum;
__u32 *iparray;
/* hangs off end. */
struct nf_nat_range range[IPT_SAME_MAX_RANGE];

View File

@ -13,8 +13,8 @@ enum {
#define IPT_TTL_MAXMODE IPT_TTL_DEC
struct ipt_TTL_info {
u_int8_t mode;
u_int8_t ttl;
__u8 mode;
__u8 ttl;
};

View File

@ -9,17 +9,17 @@ enum {
};
struct ipt_addrtype_info_v1 {
u_int16_t source; /* source-type mask */
u_int16_t dest; /* dest-type mask */
u_int32_t flags;
__u16 source; /* source-type mask */
__u16 dest; /* dest-type mask */
__u32 flags;
};
/* revision 0 */
struct ipt_addrtype_info {
u_int16_t source; /* source-type mask */
u_int16_t dest; /* dest-type mask */
u_int32_t invert_source;
u_int32_t invert_dest;
__u16 source; /* source-type mask */
__u16 dest; /* dest-type mask */
__u32 invert_source;
__u32 invert_dest;
};
#endif

View File

@ -2,8 +2,8 @@
#define _IPT_AH_H
struct ipt_ah {
u_int32_t spis[2]; /* Security Parameter Index */
u_int8_t invflags; /* Inverse flags */
__u32 spis[2]; /* Security Parameter Index */
__u8 invflags; /* Inverse flags */
};

View File

@ -20,12 +20,12 @@
/* match info */
struct ipt_ecn_info {
u_int8_t operation;
u_int8_t invert;
u_int8_t ip_ect;
__u8 operation;
__u8 invert;
__u8 ip_ect;
union {
struct {
u_int8_t ect;
__u8 ect;
} tcp;
} proto;
};

View File

@ -13,8 +13,8 @@ enum {
struct ipt_ttl_info {
u_int8_t mode;
u_int8_t ttl;
__u8 mode;
__u8 ttl;
};

View File

@ -14,8 +14,8 @@ enum {
#define IP6T_HL_MAXMODE IP6T_HL_DEC
struct ip6t_HL_info {
u_int8_t mode;
u_int8_t hop_limit;
__u8 mode;
__u8 hop_limit;
};

View File

@ -12,7 +12,7 @@ enum ip6t_reject_with {
};
struct ip6t_reject_info {
u_int32_t with; /* reject type */
__u32 with; /* reject type */
};
#endif /*_IP6T_REJECT_H*/

View File

@ -2,10 +2,10 @@
#define _IP6T_AH_H
struct ip6t_ah {
u_int32_t spis[2]; /* Security Parameter Index */
u_int32_t hdrlen; /* Header Length */
u_int8_t hdrres; /* Test of the Reserved Filed */
u_int8_t invflags; /* Inverse flags */
__u32 spis[2]; /* Security Parameter Index */
__u32 hdrlen; /* Header Length */
__u8 hdrres; /* Test of the Reserved Filed */
__u8 invflags; /* Inverse flags */
};
#define IP6T_AH_SPI 0x01

View File

@ -2,10 +2,10 @@
#define _IP6T_FRAG_H
struct ip6t_frag {
u_int32_t ids[2]; /* Security Parameter Index */
u_int32_t hdrlen; /* Header Length */
u_int8_t flags; /* */
u_int8_t invflags; /* Inverse flags */
__u32 ids[2]; /* Security Parameter Index */
__u32 hdrlen; /* Header Length */
__u8 flags; /* */
__u8 invflags; /* Inverse flags */
};
#define IP6T_FRAG_IDS 0x01

View File

@ -14,8 +14,8 @@ enum {
struct ip6t_hl_info {
u_int8_t mode;
u_int8_t hop_limit;
__u8 mode;
__u8 hop_limit;
};

View File

@ -9,9 +9,9 @@ on whether they contain certain headers */
#define __IPV6HEADER_H
struct ip6t_ipv6header_info {
u_int8_t matchflags;
u_int8_t invflags;
u_int8_t modeflag;
__u8 matchflags;
__u8 invflags;
__u8 modeflag;
};
#define MASK_HOPOPTS 128

View File

@ -3,8 +3,8 @@
/* MH matching stuff */
struct ip6t_mh {
u_int8_t types[2]; /* MH type range */
u_int8_t invflags; /* Inverse flags */
__u8 types[2]; /* MH type range */
__u8 invflags; /* Inverse flags */
};
/* Values for "invflags" field in struct ip6t_mh. */

View File

@ -4,11 +4,11 @@
#define IP6T_OPTS_OPTSNR 16
struct ip6t_opts {
u_int32_t hdrlen; /* Header Length */
u_int8_t flags; /* */
u_int8_t invflags; /* Inverse flags */
u_int16_t opts[IP6T_OPTS_OPTSNR]; /* opts */
u_int8_t optsnr; /* Nr of OPts */
__u32 hdrlen; /* Header Length */
__u8 flags; /* */
__u8 invflags; /* Inverse flags */
__u16 opts[IP6T_OPTS_OPTSNR]; /* opts */
__u8 optsnr; /* Nr of OPts */
};
#define IP6T_OPTS_LEN 0x01

View File

@ -6,13 +6,13 @@
#define IP6T_RT_HOPS 16
struct ip6t_rt {
u_int32_t rt_type; /* Routing Type */
u_int32_t segsleft[2]; /* Segments Left */
u_int32_t hdrlen; /* Header Length */
u_int8_t flags; /* */
u_int8_t invflags; /* Inverse flags */
__u32 rt_type; /* Routing Type */
__u32 segsleft[2]; /* Segments Left */
__u32 hdrlen; /* Header Length */
__u8 flags; /* */
__u8 invflags; /* Inverse flags */
struct in6_addr addrs[IP6T_RT_HOPS]; /* Hops */
u_int8_t addrnr; /* Nr of Addresses */
__u8 addrnr; /* Nr of Addresses */
};
#define IP6T_RT_TYP 0x01

View File

@ -72,7 +72,7 @@ struct dst_entry {
u32 _metrics[RTAX_MAX];
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
__u32 tclassid;
#else
__u32 __pad2;

View File

@ -55,7 +55,7 @@ struct fib_nh {
int nh_weight;
int nh_power;
#endif
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
__u32 nh_tclassid;
#endif
int nh_oif;
@ -201,7 +201,7 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp,
extern int __net_init fib4_rules_init(struct net *net);
extern void __net_exit fib4_rules_exit(struct net *net);
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
extern u32 fib_rules_tclass(struct fib_result *res);
#endif
@ -235,7 +235,7 @@ extern struct fib_table *fib_hash_table(u32 id);
static inline void fib_combine_itag(u32 *itag, struct fib_result *res)
{
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_MULTIPLE_TABLES
u32 rtag;
#endif

View File

@ -28,6 +28,80 @@
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack.h>
#endif
#include <net/net_namespace.h> /* Netw namespace */
/*
* Generic access of ipvs struct
*/
static inline struct netns_ipvs *net_ipvs(struct net* net)
{
return net->ipvs;
}
/*
* Get net ptr from skb in traffic cases
* use skb_sknet when call is from userland (ioctl or netlink)
*/
static inline struct net *skb_net(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_NS
#ifdef CONFIG_IP_VS_DEBUG
/*
* This is used for debug only.
* Start with the most likely hit
* End with BUG
*/
if (likely(skb->dev && skb->dev->nd_net))
return dev_net(skb->dev);
if (skb_dst(skb)->dev)
return dev_net(skb_dst(skb)->dev);
WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n",
__func__, __LINE__);
if (likely(skb->sk && skb->sk->sk_net))
return sock_net(skb->sk);
pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
__func__, __LINE__);
BUG();
#else
return dev_net(skb->dev ? : skb_dst(skb)->dev);
#endif
#else
return &init_net;
#endif
}
static inline struct net *skb_sknet(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_NS
#ifdef CONFIG_IP_VS_DEBUG
/* Start with the most likely hit */
if (likely(skb->sk && skb->sk->sk_net))
return sock_net(skb->sk);
WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n",
__func__, __LINE__);
if (likely(skb->dev && skb->dev->nd_net))
return dev_net(skb->dev);
pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
__func__, __LINE__);
BUG();
#else
return sock_net(skb->sk);
#endif
#else
return &init_net;
#endif
}
/*
* This one needed for single_open_net since net is stored directly in
* private not as a struct i.e. seq_file_net cant be used.
*/
static inline struct net *seq_file_single_net(struct seq_file *seq)
{
#ifdef CONFIG_NET_NS
return (struct net *)seq->private;
#else
return &init_net;
#endif
}
/* Connections' size value needed by ip_vs_ctl.c */
extern int ip_vs_conn_tab_size;
@ -258,6 +332,23 @@ struct ip_vs_seq {
before last resized pkt */
};
/*
* counters per cpu
*/
struct ip_vs_counters {
__u32 conns; /* connections scheduled */
__u32 inpkts; /* incoming packets */
__u32 outpkts; /* outgoing packets */
__u64 inbytes; /* incoming bytes */
__u64 outbytes; /* outgoing bytes */
};
/*
* Stats per cpu
*/
struct ip_vs_cpu_stats {
struct ip_vs_counters ustats;
struct u64_stats_sync syncp;
};
/*
* IPVS statistics objects
@ -279,17 +370,34 @@ struct ip_vs_estimator {
};
struct ip_vs_stats {
struct ip_vs_stats_user ustats; /* statistics */
struct ip_vs_stats_user ustats; /* statistics */
struct ip_vs_estimator est; /* estimator */
spinlock_t lock; /* spin lock */
struct ip_vs_cpu_stats *cpustats; /* per cpu counters */
spinlock_t lock; /* spin lock */
};
/*
* Helper Macros for per cpu
* ipvs->tot_stats->ustats.count
*/
#define IPVS_STAT_INC(ipvs, count) \
__this_cpu_inc((ipvs)->ustats->count)
#define IPVS_STAT_ADD(ipvs, count, value) \
do {\
write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \
raw_smp_processor_id())); \
__this_cpu_add((ipvs)->ustats->count, value); \
write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \
raw_smp_processor_id())); \
} while (0)
struct dst_entry;
struct iphdr;
struct ip_vs_conn;
struct ip_vs_app;
struct sk_buff;
struct ip_vs_proto_data;
struct ip_vs_protocol {
struct ip_vs_protocol *next;
@ -297,21 +405,22 @@ struct ip_vs_protocol {
u16 protocol;
u16 num_states;
int dont_defrag;
atomic_t appcnt; /* counter of proto app incs */
int *timeout_table; /* protocol timeout table */
void (*init)(struct ip_vs_protocol *pp);
void (*exit)(struct ip_vs_protocol *pp);
void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
int (*conn_schedule)(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp,
struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp);
struct ip_vs_conn *
(*conn_in_get)(int af,
const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@ -319,7 +428,6 @@ struct ip_vs_protocol {
struct ip_vs_conn *
(*conn_out_get)(int af,
const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@ -337,11 +445,11 @@ struct ip_vs_protocol {
int (*state_transition)(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
struct ip_vs_protocol *pp);
struct ip_vs_proto_data *pd);
int (*register_app)(struct ip_vs_app *inc);
int (*register_app)(struct net *net, struct ip_vs_app *inc);
void (*unregister_app)(struct ip_vs_app *inc);
void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
int (*app_conn_bind)(struct ip_vs_conn *cp);
@ -350,14 +458,26 @@ struct ip_vs_protocol {
int offset,
const char *msg);
void (*timeout_change)(struct ip_vs_protocol *pp, int flags);
int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to);
void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
};
extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
/*
* protocol data per netns
*/
struct ip_vs_proto_data {
struct ip_vs_proto_data *next;
struct ip_vs_protocol *pp;
int *timeout_table; /* protocol timeout table */
atomic_t appcnt; /* counter of proto app incs. */
struct tcp_states_t *tcp_state_table;
};
extern struct ip_vs_protocol *ip_vs_proto_get(unsigned short proto);
extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net,
unsigned short proto);
struct ip_vs_conn_param {
struct net *net;
const union nf_inet_addr *caddr;
const union nf_inet_addr *vaddr;
__be16 cport;
@ -375,16 +495,19 @@ struct ip_vs_conn_param {
*/
struct ip_vs_conn {
struct list_head c_list; /* hashed list heads */
#ifdef CONFIG_NET_NS
struct net *net; /* Name space */
#endif
/* Protocol, addresses and port numbers */
u16 af; /* address family */
union nf_inet_addr caddr; /* client address */
union nf_inet_addr vaddr; /* virtual address */
union nf_inet_addr daddr; /* destination address */
volatile __u32 flags; /* status flags */
__be16 cport;
__be16 vport;
__be16 dport;
u16 af; /* address family */
__be16 cport;
__be16 vport;
__be16 dport;
__u32 fwmark; /* Fire wall mark from skb */
union nf_inet_addr caddr; /* client address */
union nf_inet_addr vaddr; /* virtual address */
union nf_inet_addr daddr; /* destination address */
volatile __u32 flags; /* status flags */
__u16 protocol; /* Which protocol (TCP/UDP) */
/* counter and timer */
@ -422,10 +545,38 @@ struct ip_vs_conn {
struct ip_vs_seq in_seq; /* incoming seq. struct */
struct ip_vs_seq out_seq; /* outgoing seq. struct */
const struct ip_vs_pe *pe;
char *pe_data;
__u8 pe_data_len;
};
/*
* To save some memory in conn table when name space is disabled.
*/
static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
{
#ifdef CONFIG_NET_NS
return cp->net;
#else
return &init_net;
#endif
}
static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
{
#ifdef CONFIG_NET_NS
cp->net = net;
#endif
}
static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
struct net *net)
{
#ifdef CONFIG_NET_NS
return cp->net == net;
#else
return 1;
#endif
}
/*
* Extended internal versions of struct ip_vs_service_user and
@ -485,6 +636,7 @@ struct ip_vs_service {
unsigned flags; /* service status flags */
unsigned timeout; /* persistent timeout in ticks */
__be32 netmask; /* grouping granularity */
struct net *net;
struct list_head destinations; /* real server d-linked list */
__u32 num_dests; /* number of servers */
@ -510,8 +662,8 @@ struct ip_vs_dest {
struct list_head d_list; /* for table with all the dests */
u16 af; /* address family */
union nf_inet_addr addr; /* IP address of the server */
__be16 port; /* port number of the server */
union nf_inet_addr addr; /* IP address of the server */
volatile unsigned flags; /* dest status flags */
atomic_t conn_flags; /* flags to copy to conn */
atomic_t weight; /* server weight */
@ -538,8 +690,8 @@ struct ip_vs_dest {
/* for virtual service */
struct ip_vs_service *svc; /* service it belongs to */
__u16 protocol; /* which protocol (TCP/UDP) */
union nf_inet_addr vaddr; /* virtual IP address */
__be16 vport; /* virtual port number */
union nf_inet_addr vaddr; /* virtual IP address */
__u32 vfwmark; /* firewall mark of service */
};
@ -674,13 +826,14 @@ enum {
IP_VS_DIR_LAST,
};
static inline void ip_vs_conn_fill_param(int af, int protocol,
static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
const union nf_inet_addr *caddr,
__be16 cport,
const union nf_inet_addr *vaddr,
__be16 vport,
struct ip_vs_conn_param *p)
{
p->net = net;
p->af = af;
p->protocol = protocol;
p->caddr = caddr;
@ -695,7 +848,6 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@ -703,7 +855,6 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@ -719,14 +870,14 @@ extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
const union nf_inet_addr *daddr,
__be16 dport, unsigned flags,
struct ip_vs_dest *dest);
struct ip_vs_dest *dest, __u32 fwmark);
extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
extern const char * ip_vs_state_name(__u16 proto, int state);
extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
extern int ip_vs_check_template(struct ip_vs_conn *ct);
extern void ip_vs_random_dropentry(void);
extern void ip_vs_random_dropentry(struct net *net);
extern int ip_vs_conn_init(void);
extern void ip_vs_conn_cleanup(void);
@ -796,12 +947,12 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
* (from ip_vs_app.c)
*/
#define IP_VS_APP_MAX_PORTS 8
extern int register_ip_vs_app(struct ip_vs_app *app);
extern void unregister_ip_vs_app(struct ip_vs_app *app);
extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
extern int
register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app,
__u16 proto, __u16 port);
extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
@ -814,15 +965,27 @@ void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
void ip_vs_unbind_pe(struct ip_vs_service *svc);
int register_ip_vs_pe(struct ip_vs_pe *pe);
int unregister_ip_vs_pe(struct ip_vs_pe *pe);
extern struct ip_vs_pe *ip_vs_pe_get(const char *name);
extern void ip_vs_pe_put(struct ip_vs_pe *pe);
struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
static inline void ip_vs_pe_get(const struct ip_vs_pe *pe)
{
if (pe && pe->module)
__module_get(pe->module);
}
static inline void ip_vs_pe_put(const struct ip_vs_pe *pe)
{
if (pe && pe->module)
module_put(pe->module);
}
/*
* IPVS protocol functions (from ip_vs_proto.c)
*/
extern int ip_vs_protocol_init(void);
extern void ip_vs_protocol_cleanup(void);
extern void ip_vs_protocol_timeout_change(int flags);
extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
extern int *ip_vs_create_timeout_table(int *table, int size);
extern int
ip_vs_set_state_timeout(int *table, int num, const char *const *names,
@ -852,26 +1015,21 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp, int *ignored);
struct ip_vs_proto_data *pd, int *ignored);
extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp);
struct ip_vs_proto_data *pd);
/*
* IPVS control data and functions (from ip_vs_ctl.c)
*/
extern int sysctl_ip_vs_cache_bypass;
extern int sysctl_ip_vs_expire_nodest_conn;
extern int sysctl_ip_vs_expire_quiescent_template;
extern int sysctl_ip_vs_sync_threshold[2];
extern int sysctl_ip_vs_nat_icmp_send;
extern int sysctl_ip_vs_conntrack;
extern int sysctl_ip_vs_snat_reroute;
extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];
extern int sysctl_ip_vs_sync_ver;
extern void ip_vs_sync_switch_mode(struct net *net, int mode);
extern struct ip_vs_service *
ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport);
static inline void ip_vs_service_put(struct ip_vs_service *svc)
@ -880,7 +1038,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
}
extern struct ip_vs_dest *
ip_vs_lookup_real_service(int af, __u16 protocol,
ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport);
extern int ip_vs_use_count_inc(void);
@ -888,8 +1046,9 @@ extern void ip_vs_use_count_dec(void);
extern int ip_vs_control_init(void);
extern void ip_vs_control_cleanup(void);
extern struct ip_vs_dest *
ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
__be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
__u16 protocol, __u32 fwmark);
extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
@ -897,14 +1056,12 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
* IPVS sync daemon data and function prototypes
* (from ip_vs_sync.c)
*/
extern volatile int ip_vs_sync_state;
extern volatile int ip_vs_master_syncid;
extern volatile int ip_vs_backup_syncid;
extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
extern int stop_sync_thread(int state);
extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
__u8 syncid);
extern int stop_sync_thread(struct net *net, int state);
extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
extern int ip_vs_sync_init(void);
extern void ip_vs_sync_cleanup(void);
/*
@ -912,8 +1069,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
*/
extern int ip_vs_estimator_init(void);
extern void ip_vs_estimator_cleanup(void);
extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats);
extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats);
extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
/*
@ -955,11 +1112,13 @@ extern int ip_vs_icmp_xmit_v6
extern int ip_vs_drop_rate;
extern int ip_vs_drop_counter;
static __inline__ int ip_vs_todrop(void)
static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
{
if (!ip_vs_drop_rate) return 0;
if (--ip_vs_drop_counter > 0) return 0;
ip_vs_drop_counter = ip_vs_drop_rate;
if (!ipvs->drop_rate)
return 0;
if (--ipvs->drop_counter > 0)
return 0;
ipvs->drop_counter = ipvs->drop_rate;
return 1;
}
@ -1047,9 +1206,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
* Netfilter connection tracking
* (from ip_vs_nfct.c)
*/
static inline int ip_vs_conntrack_enabled(void)
static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
{
return sysctl_ip_vs_conntrack;
return ipvs->sysctl_conntrack;
}
extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
@ -1062,7 +1221,7 @@ extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
#else
static inline int ip_vs_conntrack_enabled(void)
static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
{
return 0;
}

View File

@ -20,6 +20,7 @@
#include <net/netns/conntrack.h>
#endif
#include <net/netns/xfrm.h>
#include <net/netns/ip_vs.h>
struct proc_dir_entry;
struct net_device;
@ -94,6 +95,7 @@ struct net {
#ifdef CONFIG_XFRM
struct netns_xfrm xfrm;
#endif
struct netns_ipvs *ipvs;
};

View File

@ -50,11 +50,24 @@ union nf_conntrack_expect_proto {
/* per conntrack: application helper private data */
union nf_conntrack_help {
/* insert conntrack helper private data (master) here */
#if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE)
struct nf_ct_ftp_master ct_ftp_info;
#endif
#if defined(CONFIG_NF_CONNTRACK_PPTP) || \
defined(CONFIG_NF_CONNTRACK_PPTP_MODULE)
struct nf_ct_pptp_master ct_pptp_info;
#endif
#if defined(CONFIG_NF_CONNTRACK_H323) || \
defined(CONFIG_NF_CONNTRACK_H323_MODULE)
struct nf_ct_h323_master ct_h323_info;
#endif
#if defined(CONFIG_NF_CONNTRACK_SANE) || \
defined(CONFIG_NF_CONNTRACK_SANE_MODULE)
struct nf_ct_sane_master ct_sane_info;
#endif
#if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE)
struct nf_ct_sip_master ct_sip_info;
#endif
};
#include <linux/types.h>
@ -116,14 +129,14 @@ struct nf_conn {
u_int32_t secmark;
#endif
/* Storage reserved for other modules: */
union nf_conntrack_proto proto;
/* Extensions */
struct nf_ct_ext *ext;
#ifdef CONFIG_NET_NS
struct net *ct_net;
#endif
/* Storage reserved for other modules, must be the last member */
union nf_conntrack_proto proto;
};
static inline struct nf_conn *
@ -189,9 +202,9 @@ extern void nf_ct_l3proto_module_put(unsigned short l3proto);
* Allocate a hashtable of hlist_head (if nulls == 0),
* or hlist_nulls_head (if nulls == 1)
*/
extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls);
extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls);
extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size);
extern void nf_ct_free_hashtable(void *hash, unsigned int size);
extern struct nf_conntrack_tuple_hash *
__nf_conntrack_find(struct net *net, u16 zone,

View File

@ -23,12 +23,17 @@ struct nf_conntrack_ecache {
static inline struct nf_conntrack_ecache *
nf_ct_ecache_find(const struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE);
#else
return NULL;
#endif
}
static inline struct nf_conntrack_ecache *
nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *e;
@ -45,6 +50,9 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
e->expmask = expmask;
}
return e;
#else
return NULL;
#endif
};
#ifdef CONFIG_NF_CONNTRACK_EVENTS
@ -59,7 +67,7 @@ struct nf_ct_event_notifier {
int (*fcn)(unsigned int events, struct nf_ct_event *item);
};
extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
extern struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
@ -159,7 +167,7 @@ struct nf_exp_event_notifier {
int (*fcn)(unsigned int events, struct nf_exp_event *item);
};
extern struct nf_exp_event_notifier *nf_expect_event_cb;
extern struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);

View File

@ -7,10 +7,19 @@
enum nf_ct_ext_id {
NF_CT_EXT_HELPER,
#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
NF_CT_EXT_NAT,
#endif
NF_CT_EXT_ACCT,
#ifdef CONFIG_NF_CONNTRACK_EVENTS
NF_CT_EXT_ECACHE,
#endif
#ifdef CONFIG_NF_CONNTRACK_ZONES
NF_CT_EXT_ZONE,
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
NF_CT_EXT_TSTAMP,
#endif
NF_CT_EXT_NUM,
};
@ -19,6 +28,7 @@ enum nf_ct_ext_id {
#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
#define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp
/* Extensions: optional stuff which isn't permanently in struct. */
struct nf_ct_ext {

View File

@ -63,4 +63,10 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
extern int nf_conntrack_helper_init(void);
extern void nf_conntrack_helper_fini(void);
extern int nf_conntrack_broadcast_help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int timeout);
#endif /*_NF_CONNTRACK_HELPER_H*/

View File

@ -73,7 +73,7 @@ struct nf_conntrack_l3proto {
struct module *me;
};
extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX];
/* Protocol registration. */
extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);

View File

@ -0,0 +1,53 @@
#ifndef _NF_CONNTRACK_TSTAMP_H
#define _NF_CONNTRACK_TSTAMP_H
#include <net/net_namespace.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_extend.h>
struct nf_conn_tstamp {
u_int64_t start;
u_int64_t stop;
};
static inline
struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP);
#else
return NULL;
#endif
}
static inline
struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp)
{
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
struct net *net = nf_ct_net(ct);
if (!net->ct.sysctl_tstamp)
return NULL;
return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp);
#else
return NULL;
#endif
};
static inline bool nf_ct_tstamp_enabled(struct net *net)
{
return net->ct.sysctl_tstamp != 0;
}
static inline void nf_ct_set_tstamp(struct net *net, bool enable)
{
net->ct.sysctl_tstamp = enable;
}
extern int nf_conntrack_tstamp_init(struct net *net);
extern void nf_conntrack_tstamp_fini(struct net *net);
#endif /* _NF_CONNTRACK_TSTAMP_H */

View File

@ -56,7 +56,9 @@ struct nf_nat_multi_range_compat {
/* per conntrack: nat application helper private data */
union nf_conntrack_nat_help {
/* insert nat helper private data here */
#if defined(CONFIG_NF_NAT_PPTP) || defined(CONFIG_NF_NAT_PPTP_MODULE)
struct nf_nat_pptp nat_pptp_info;
#endif
};
struct nf_conn;
@ -84,7 +86,11 @@ extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
{
#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
return nf_ct_ext_find(ct, NF_CT_EXT_NAT);
#else
return NULL;
#endif
}
#else /* !__KERNEL__: iptables wants this to compile. */

View File

@ -21,9 +21,9 @@ static inline int nf_nat_initialized(struct nf_conn *ct,
enum nf_nat_manip_type manip)
{
if (manip == IP_NAT_MANIP_SRC)
return test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
return ct->status & IPS_SRC_NAT_DONE;
else
return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
return ct->status & IPS_DST_NAT_DONE;
}
struct nlattr;

View File

@ -21,15 +21,15 @@ struct netns_ct {
int sysctl_events;
unsigned int sysctl_events_retry_timeout;
int sysctl_acct;
int sysctl_tstamp;
int sysctl_checksum;
unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
struct ctl_table_header *acct_sysctl_header;
struct ctl_table_header *tstamp_sysctl_header;
struct ctl_table_header *event_sysctl_header;
#endif
int hash_vmalloc;
int expect_vmalloc;
char *slabname;
};
#endif

143
include/net/netns/ip_vs.h Normal file
View File

@ -0,0 +1,143 @@
/*
* IP Virtual Server
* Data structure for network namspace
*
*/
#ifndef IP_VS_H_
#define IP_VS_H_
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/list_nulls.h>
#include <linux/ip_vs.h>
#include <asm/atomic.h>
#include <linux/in.h>
struct ip_vs_stats;
struct ip_vs_sync_buff;
struct ctl_table_header;
struct netns_ipvs {
int gen; /* Generation */
/*
* Hash table: for real service lookups
*/
#define IP_VS_RTAB_BITS 4
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
struct list_head rs_table[IP_VS_RTAB_SIZE];
/* ip_vs_app */
struct list_head app_list;
struct mutex app_mutex;
struct lock_class_key app_key; /* mutex debuging */
/* ip_vs_proto */
#define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
/* ip_vs_proto_tcp */
#ifdef CONFIG_IP_VS_PROTO_TCP
#define TCP_APP_TAB_BITS 4
#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
struct list_head tcp_apps[TCP_APP_TAB_SIZE];
spinlock_t tcp_app_lock;
#endif
/* ip_vs_proto_udp */
#ifdef CONFIG_IP_VS_PROTO_UDP
#define UDP_APP_TAB_BITS 4
#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
struct list_head udp_apps[UDP_APP_TAB_SIZE];
spinlock_t udp_app_lock;
#endif
/* ip_vs_proto_sctp */
#ifdef CONFIG_IP_VS_PROTO_SCTP
#define SCTP_APP_TAB_BITS 4
#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
/* Hash table for SCTP application incarnations */
struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
spinlock_t sctp_app_lock;
#endif
/* ip_vs_conn */
atomic_t conn_count; /* connection counter */
/* ip_vs_ctl */
struct ip_vs_stats *tot_stats; /* Statistics & est. */
struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
seqcount_t *ustats_seq; /* u64 read retry */
int num_services; /* no of virtual services */
/* 1/rate drop and drop-entry variables */
struct delayed_work defense_work; /* Work handler */
int drop_rate;
int drop_counter;
atomic_t dropentry;
/* locks in ctl.c */
spinlock_t dropentry_lock; /* drop entry handling */
spinlock_t droppacket_lock; /* drop packet handling */
spinlock_t securetcp_lock; /* state and timeout tables */
rwlock_t rs_lock; /* real services table */
/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
struct lock_class_key ctl_key; /* ctl_mutex debuging */
/* Trash for destinations */
struct list_head dest_trash;
/* Service counters */
atomic_t ftpsvc_counter;
atomic_t nullsvc_counter;
/* sys-ctl struct */
struct ctl_table_header *sysctl_hdr;
struct ctl_table *sysctl_tbl;
/* sysctl variables */
int sysctl_amemthresh;
int sysctl_am_droprate;
int sysctl_drop_entry;
int sysctl_drop_packet;
int sysctl_secure_tcp;
#ifdef CONFIG_IP_VS_NFCT
int sysctl_conntrack;
#endif
int sysctl_snat_reroute;
int sysctl_sync_ver;
int sysctl_cache_bypass;
int sysctl_expire_nodest_conn;
int sysctl_expire_quiescent_template;
int sysctl_sync_threshold[2];
int sysctl_nat_icmp_send;
/* ip_vs_lblc */
int sysctl_lblc_expiration;
struct ctl_table_header *lblc_ctl_header;
struct ctl_table *lblc_ctl_table;
/* ip_vs_lblcr */
int sysctl_lblcr_expiration;
struct ctl_table_header *lblcr_ctl_header;
struct ctl_table *lblcr_ctl_table;
/* ip_vs_est */
struct list_head est_list; /* estimator list */
spinlock_t est_lock;
struct timer_list est_timer; /* Estimation timer */
/* ip_vs_sync */
struct list_head sync_queue;
spinlock_t sync_lock;
struct ip_vs_sync_buff *sync_buff;
spinlock_t sync_buff_lock;
struct sockaddr_in sync_mcast_addr;
struct task_struct *master_thread;
struct task_struct *backup_thread;
int send_mesg_maxlen;
int recv_mesg_maxlen;
volatile int sync_state;
volatile int master_syncid;
volatile int backup_syncid;
/* multicast interface name */
char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
/* net name space ptr */
struct net *net; /* Needed by timer routines */
};
#endif /* IP_VS_H_ */

View File

@ -43,7 +43,6 @@ struct netns_ipv4 {
struct xt_table *nat_table;
struct hlist_head *nat_bysource;
unsigned int nat_htable_size;
int nat_vmalloced;
#endif
int sysctl_icmp_echo_ignore_all;

View File

@ -74,6 +74,8 @@ static int audit_initialized;
int audit_enabled;
int audit_ever_enabled;
EXPORT_SYMBOL_GPL(audit_enabled);
/* Default state when kernel boots without any parameters. */
static int audit_default;

View File

@ -22,9 +22,15 @@
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_ip6.h>
struct tcpudphdr {
__be16 src;
__be16 dst;
union pkthdr {
struct {
__be16 src;
__be16 dst;
} tcpudphdr;
struct {
u8 type;
u8 code;
} icmphdr;
};
static bool
@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct ebt_ip6_info *info = par->matchinfo;
const struct ipv6hdr *ih6;
struct ipv6hdr _ip6h;
const struct tcpudphdr *pptr;
struct tcpudphdr _ports;
const union pkthdr *pptr;
union pkthdr _pkthdr;
ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
if (ih6 == NULL)
@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
return false;
if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
return false;
if (!(info->bitmask & EBT_IP6_DPORT) &&
!(info->bitmask & EBT_IP6_SPORT))
if (!(info->bitmask & ( EBT_IP6_DPORT |
EBT_IP6_SPORT | EBT_IP6_ICMP6)))
return true;
pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports),
&_ports);
/* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */
pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr),
&_pkthdr);
if (pptr == NULL)
return false;
if (info->bitmask & EBT_IP6_DPORT) {
u32 dst = ntohs(pptr->dst);
u16 dst = ntohs(pptr->tcpudphdr.dst);
if (FWINV(dst < info->dport[0] ||
dst > info->dport[1], EBT_IP6_DPORT))
return false;
}
if (info->bitmask & EBT_IP6_SPORT) {
u32 src = ntohs(pptr->src);
u16 src = ntohs(pptr->tcpudphdr.src);
if (FWINV(src < info->sport[0] ||
src > info->sport[1], EBT_IP6_SPORT))
return false;
}
return true;
if ((info->bitmask & EBT_IP6_ICMP6) &&
FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
pptr->icmphdr.type > info->icmpv6_type[1] ||
pptr->icmphdr.code < info->icmpv6_code[0] ||
pptr->icmphdr.code > info->icmpv6_code[1],
EBT_IP6_ICMP6))
return false;
}
return true;
}
@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
return -EINVAL;
if (info->bitmask & EBT_IP6_ICMP6) {
if ((info->invflags & EBT_IP6_PROTO) ||
info->protocol != IPPROTO_ICMPV6)
return -EINVAL;
if (info->icmpv6_type[0] > info->icmpv6_type[1] ||
info->icmpv6_code[0] > info->icmpv6_code[1])
return -EINVAL;
}
return 0;
}

View File

@ -1764,6 +1764,7 @@ static int compat_table_info(const struct ebt_table_info *info,
newinfo->entries_size = size;
xt_compat_init_offsets(AF_INET, info->nentries);
return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
entries, newinfo);
}

View File

@ -140,6 +140,9 @@ config IP_ROUTE_VERBOSE
handled by the klogd daemon which is responsible for kernel messages
("man klogd").
config IP_ROUTE_CLASSID
bool
config IP_PNP
bool "IP: kernel level autoconfiguration"
help
@ -657,4 +660,3 @@ config TCP_MD5SIG
on the Internet.
If unsure, say N.

View File

@ -41,12 +41,12 @@ struct fib4_rule {
__be32 srcmask;
__be32 dst;
__be32 dstmask;
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
u32 tclassid;
#endif
};
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
u32 fib_rules_tclass(struct fib_result *res)
{
return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
@ -165,7 +165,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
if (frh->dst_len)
rule4->dst = nla_get_be32(tb[FRA_DST]);
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
if (tb[FRA_FLOW])
rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
#endif
@ -195,7 +195,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->tos && (rule4->tos != frh->tos))
return 0;
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
return 0;
#endif
@ -224,7 +224,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
if (rule4->src_len)
NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
if (rule4->tclassid)
NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
#endif

View File

@ -200,7 +200,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
#ifdef CONFIG_IP_ROUTE_MULTIPATH
nh->nh_weight != onh->nh_weight ||
#endif
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
nh->nh_tclassid != onh->nh_tclassid ||
#endif
((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
@ -422,7 +422,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
nla = nla_find(attrs, attrlen, RTA_FLOW);
nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
#endif
@ -476,7 +476,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
if (nla && nla_get_be32(nla) != nh->nh_gw)
return 1;
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
nla = nla_find(attrs, attrlen, RTA_FLOW);
if (nla && nla_get_u32(nla) != nh->nh_tclassid)
return 1;
@ -779,7 +779,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
goto err_inval;
if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
goto err_inval;
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
goto err_inval;
#endif
@ -792,7 +792,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
nh->nh_oif = cfg->fc_oif;
nh->nh_gw = cfg->fc_gw;
nh->nh_flags = cfg->fc_flags;
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
nh->nh_tclassid = cfg->fc_flow;
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@ -1002,7 +1002,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
if (fi->fib_nh->nh_oif)
NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
if (fi->fib_nh[0].nh_tclassid)
NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
#endif
@ -1027,7 +1027,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
if (nh->nh_gw)
NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
if (nh->nh_tclassid)
NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
#endif

View File

@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
}
}
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
if (unlikely(skb_dst(skb)->tclassid)) {
struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
u32 idx = skb_dst(skb)->tclassid;

View File

@ -206,8 +206,9 @@ config IP_NF_TARGET_REDIRECT
config NF_NAT_SNMP_BASIC
tristate "Basic SNMP-ALG support"
depends on NF_NAT
depends on NF_CONNTRACK_SNMP && NF_NAT
depends on NETFILTER_ADVANCED
default NF_NAT && NF_CONNTRACK_SNMP
---help---
This module implements an Application Layer Gateway (ALG) for

View File

@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries[raw_smp_processor_id()];
xt_compat_init_offsets(NFPROTO_ARP, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@ -1333,6 +1334,7 @@ static int translate_compat_table(const char *name,
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(NFPROTO_ARP);
xt_compat_init_offsets(NFPROTO_ARP, number);
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, total_size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,

View File

@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries[raw_smp_processor_id()];
xt_compat_init_offsets(AF_INET, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@ -1664,6 +1665,7 @@ translate_compat_table(struct net *net,
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET);
xt_compat_init_offsets(AF_INET, number);
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, total_size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,

View File

@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
* that the ->target() function isn't called after ->destroy() */
ct = nf_ct_get(skb, &ctinfo);
if (ct == NULL) {
pr_info("no conntrack!\n");
/* FIXME: need to drop invalid ones, since replies
* to outgoing connections of other nodes will be
* marked as INVALID */
if (ct == NULL)
return NF_DROP;
}
/* special case: ICMP error handling. conntrack distinguishes between
* error messages (RELATED) and information requests (see below) */

View File

@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf,
}
#endif
/* MAC logging for input path only. */
if (in && !out)
if (in != NULL)
dump_mac_header(m, loginfo, skb);
dump_packet(m, loginfo, skb, 0);

View File

@ -20,6 +20,7 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_acct.h>
#include <linux/rculist_nulls.h>
struct ct_iter_state {
struct seq_net_private p;
@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
for (st->bucket = 0;
st->bucket < net->ct.htable_size;
st->bucket++) {
n = rcu_dereference(net->ct.hash[st->bucket].first);
n = rcu_dereference(
hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
if (!is_a_nulls(n))
return n;
}
@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
head = rcu_dereference(head->next);
head = rcu_dereference(hlist_nulls_next_rcu(head));
while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
if (++st->bucket >= net->ct.htable_size)
return NULL;
}
head = rcu_dereference(net->ct.hash[st->bucket].first);
head = rcu_dereference(
hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
}
return head;
}
@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
n = rcu_dereference(
hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
if (n)
return n;
}
@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
head = rcu_dereference(head->next);
head = rcu_dereference(hlist_next_rcu(head));
while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
head = rcu_dereference(
hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
}
return head;
}

View File

@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb,
/* Try to get same port: if not, try to change it. */
for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
int ret;
int res;
exp->tuple.dst.u.tcp.port = htons(port);
ret = nf_ct_expect_related(exp);
if (ret == 0)
res = nf_ct_expect_related(exp);
if (res == 0)
break;
else if (ret != -EBUSY) {
else if (res != -EBUSY) {
port = 0;
break;
}

View File

@ -323,9 +323,9 @@ nf_nat_setup_info(struct nf_conn *ct,
/* It's done. */
if (maniptype == IP_NAT_MANIP_DST)
set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
ct->status |= IPS_DST_NAT_DONE;
else
set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
ct->status |= IPS_SRC_NAT_DONE;
return NF_ACCEPT;
}
@ -502,7 +502,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
int ret = 0;
spin_lock_bh(&nf_nat_lock);
if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
if (rcu_dereference_protected(
nf_nat_protos[proto->protonum],
lockdep_is_held(&nf_nat_lock)
) != &nf_nat_unknown_protocol) {
ret = -EBUSY;
goto out;
}
@ -679,8 +682,7 @@ static int __net_init nf_nat_net_init(struct net *net)
{
/* Leave them the same for the moment. */
net->ipv4.nat_htable_size = net->ct.htable_size;
net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
&net->ipv4.nat_vmalloced, 0);
net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
if (!net->ipv4.nat_bysource)
return -ENOMEM;
return 0;
@ -702,8 +704,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
{
nf_ct_iterate_cleanup(net, &clean_nat, NULL);
synchronize_rcu();
nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
net->ipv4.nat_htable_size);
nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
}
static struct pernet_operations nf_nat_net_ops = {

View File

@ -54,6 +54,7 @@
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_nat_helper.h>
#include <linux/netfilter/nf_conntrack_snmp.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void)
{
int ret = 0;
ret = nf_conntrack_helper_register(&snmp_helper);
if (ret < 0)
return ret;
BUG_ON(nf_nat_snmp_hook != NULL);
rcu_assign_pointer(nf_nat_snmp_hook, help);
ret = nf_conntrack_helper_register(&snmp_trap_helper);
if (ret < 0) {
nf_conntrack_helper_unregister(&snmp_helper);
@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void)
static void __exit nf_nat_snmp_basic_fini(void)
{
nf_conntrack_helper_unregister(&snmp_helper);
rcu_assign_pointer(nf_nat_snmp_hook, NULL);
nf_conntrack_helper_unregister(&snmp_trap_helper);
}

View File

@ -514,7 +514,7 @@ static const struct file_operations rt_cpu_seq_fops = {
.release = seq_release,
};
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
static int rt_acct_proc_show(struct seq_file *m, void *v)
{
struct ip_rt_acct *dst, *src;
@ -567,14 +567,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
if (!pde)
goto err2;
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
if (!pde)
goto err3;
#endif
return 0;
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
err3:
remove_proc_entry("rt_cache", net->proc_net_stat);
#endif
@ -588,7 +588,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
{
remove_proc_entry("rt_cache", net->proc_net_stat);
remove_proc_entry("rt_cache", net->proc_net);
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
remove_proc_entry("rt_acct", net->proc_net);
#endif
}
@ -1775,7 +1775,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
memcpy(addr, &src, 4);
}
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
static void set_class_tag(struct rtable *rt, u32 tag)
{
if (!(rt->dst.tclassid & 0xFFFF))
@ -1825,7 +1825,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = FIB_RES_GW(*res);
dst_import_metrics(dst, fi->fib_metrics);
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
#endif
}
@ -1835,7 +1835,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_MULTIPLE_TABLES
set_class_tag(rt, fib_rules_tclass(res));
#endif
@ -1891,7 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->fl.mark = skb->mark;
rth->fl.fl4_src = saddr;
rth->rt_src = saddr;
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
#endif
rth->rt_iif =
@ -2208,7 +2208,7 @@ out: return err;
rth->fl.mark = skb->mark;
rth->fl.fl4_src = saddr;
rth->rt_src = saddr;
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
#endif
rth->rt_iif =
@ -2828,7 +2828,7 @@ static int rt_fill_info(struct net *net,
}
if (rt->dst.dev)
NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
if (rt->dst.tclassid)
NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
#endif
@ -3249,9 +3249,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
};
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
#endif /* CONFIG_NET_CLS_ROUTE */
#endif /* CONFIG_IP_ROUTE_CLASSID */
static __initdata unsigned long rhash_entries;
static int __init set_rhash_entries(char *str)
@ -3267,7 +3267,7 @@ int __init ip_rt_init(void)
{
int rc = 0;
#ifdef CONFIG_NET_CLS_ROUTE
#ifdef CONFIG_IP_ROUTE_CLASSID
ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
if (!ip_rt_acct)
panic("IP: failed to allocate ip_rt_acct\n");

View File

@ -1076,6 +1076,7 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries[raw_smp_processor_id()];
xt_compat_init_offsets(AF_INET6, info->number);
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@ -1679,6 +1680,7 @@ translate_compat_table(struct net *net,
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET6);
xt_compat_init_offsets(AF_INET6, number);
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, total_size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,

View File

@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf,
in ? in->name : "",
out ? out->name : "");
/* MAC logging for input path only. */
if (in && !out)
if (in != NULL)
dump_mac_header(m, loginfo, skb);
dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);

View File

@ -73,7 +73,7 @@ static struct inet_frags nf_frags;
static struct netns_frags nf_init_frags;
#ifdef CONFIG_SYSCTL
struct ctl_table nf_ct_frag6_sysctl_table[] = {
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
.data = &nf_init_frags.timeout,

View File

@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS
If unsure, say `N'.
config NF_CONNTRACK_TIMESTAMP
bool 'Connection tracking timestamping'
depends on NETFILTER_ADVANCED
help
This option enables support for connection tracking timestamping.
This allows you to store the flow start-time and to obtain
the flow-stop time (once it has been destroyed) via Connection
tracking events.
If unsure, say `N'.
config NF_CT_PROTO_DCCP
tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
depends on EXPERIMENTAL
@ -185,9 +196,13 @@ config NF_CONNTRACK_IRC
To compile it as a module, choose M here. If unsure, say N.
config NF_CONNTRACK_BROADCAST
tristate
config NF_CONNTRACK_NETBIOS_NS
tristate "NetBIOS name service protocol support"
depends on NETFILTER_ADVANCED
select NF_CONNTRACK_BROADCAST
help
NetBIOS name service requests are sent as broadcast messages from an
unprivileged port and responded to with unicast messages to the
@ -204,6 +219,21 @@ config NF_CONNTRACK_NETBIOS_NS
To compile it as a module, choose M here. If unsure, say N.
config NF_CONNTRACK_SNMP
tristate "SNMP service protocol support"
depends on NETFILTER_ADVANCED
select NF_CONNTRACK_BROADCAST
help
SNMP service requests are sent as broadcast messages from an
unprivileged port and responded to with unicast messages to the
same port. This make them hard to firewall properly because connection
tracking doesn't deal with broadcasts. This helper tracks locally
originating SNMP service requests and the corresponding
responses. It relies on correct IP address configuration, specifically
netmask and broadcast address.
To compile it as a module, choose M here. If unsure, say N.
config NF_CONNTRACK_PPTP
tristate "PPtP protocol support"
depends on NETFILTER_ADVANCED
@ -326,6 +356,16 @@ config NETFILTER_XT_CONNMARK
comment "Xtables targets"
config NETFILTER_XT_TARGET_AUDIT
tristate "AUDIT target support"
depends on AUDIT
depends on NETFILTER_ADVANCED
---help---
This option adds a 'AUDIT' target, which can be used to create
audit records for packets dropped/accepted.
To compileit as a module, choose M here. If unsure, say N.
config NETFILTER_XT_TARGET_CHECKSUM
tristate "CHECKSUM target support"
depends on IP_NF_MANGLE || IP6_NF_MANGLE
@ -477,6 +517,7 @@ config NETFILTER_XT_TARGET_NFLOG
config NETFILTER_XT_TARGET_NFQUEUE
tristate '"NFQUEUE" target Support'
depends on NETFILTER_ADVANCED
select NETFILTER_NETLINK_QUEUE
help
This target replaced the old obsolete QUEUE target.
@ -886,7 +927,7 @@ config NETFILTER_XT_MATCH_RATEEST
config NETFILTER_XT_MATCH_REALM
tristate '"realm" match support'
depends on NETFILTER_ADVANCED
select NET_CLS_ROUTE
select IP_ROUTE_CLASSID
help
This option adds a `realm' match, which allows you to use the realm
key from the routing subsystem inside iptables.

View File

@ -1,6 +1,7 @@
netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
obj-$(CONFIG_NETFILTER) = netfilter.o
@ -28,7 +29,9 @@ obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o
obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o
obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o
obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
@ -45,6 +48,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
# targets
obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o

View File

@ -175,13 +175,21 @@ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
kfree_skb(skb);
ret = -(verdict >> NF_VERDICT_BITS);
ret = NF_DROP_GETERR(verdict);
if (ret == 0)
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
verdict >> NF_VERDICT_BITS))
goto next_hook;
ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
verdict >> NF_VERDICT_QBITS);
if (ret < 0) {
if (ret == -ECANCELED)
goto next_hook;
if (ret == -ESRCH &&
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
goto next_hook;
kfree_skb(skb);
}
ret = 0;
}
rcu_read_unlock();
return ret;
@ -214,7 +222,7 @@ EXPORT_SYMBOL(skb_make_writable);
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */
void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
EXPORT_SYMBOL(ip_ct_attach);
void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
@ -231,7 +239,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
}
EXPORT_SYMBOL(nf_ct_attach);
void (*nf_ct_destroy)(struct nf_conntrack *);
void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
EXPORT_SYMBOL(nf_ct_destroy);
void nf_conntrack_destroy(struct nf_conntrack *nfct)

View File

@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app);
EXPORT_SYMBOL(unregister_ip_vs_app);
EXPORT_SYMBOL(register_ip_vs_app_inc);
/* ipvs application list head */
static LIST_HEAD(ip_vs_app_list);
static DEFINE_MUTEX(__ip_vs_app_mutex);
/*
* Get an ip_vs_app object
*/
@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
* Allocate/initialize app incarnation and register it in proto apps.
*/
static int
ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
__u16 port)
{
struct ip_vs_protocol *pp;
struct ip_vs_app *inc;
@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
}
}
ret = pp->register_app(inc);
ret = pp->register_app(net, inc);
if (ret)
goto out;
@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
* Release app incarnation
*/
static void
ip_vs_app_inc_release(struct ip_vs_app *inc)
ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_protocol *pp;
@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
return;
if (pp->unregister_app)
pp->unregister_app(inc);
pp->unregister_app(net, inc);
IP_VS_DBG(9, "%s App %s:%u unregistered\n",
pp->name, inc->name, ntohs(inc->port));
@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
* Register an application incarnation in protocol applications
*/
int
register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
__u16 port)
{
struct netns_ipvs *ipvs = net_ipvs(net);
int result;
mutex_lock(&__ip_vs_app_mutex);
mutex_lock(&ipvs->app_mutex);
result = ip_vs_app_inc_new(app, proto, port);
result = ip_vs_app_inc_new(net, app, proto, port);
mutex_unlock(&__ip_vs_app_mutex);
mutex_unlock(&ipvs->app_mutex);
return result;
}
@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
/*
* ip_vs_app registration routine
*/
int register_ip_vs_app(struct ip_vs_app *app)
int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
struct netns_ipvs *ipvs = net_ipvs(net);
/* increase the module use count */
ip_vs_use_count_inc();
mutex_lock(&__ip_vs_app_mutex);
mutex_lock(&ipvs->app_mutex);
list_add(&app->a_list, &ip_vs_app_list);
list_add(&app->a_list, &ipvs->app_list);
mutex_unlock(&__ip_vs_app_mutex);
mutex_unlock(&ipvs->app_mutex);
return 0;
}
@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app)
* ip_vs_app unregistration routine
* We are sure there are no app incarnations attached to services
*/
void unregister_ip_vs_app(struct ip_vs_app *app)
void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_app *inc, *nxt;
mutex_lock(&__ip_vs_app_mutex);
mutex_lock(&ipvs->app_mutex);
list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
ip_vs_app_inc_release(inc);
ip_vs_app_inc_release(net, inc);
}
list_del(&app->a_list);
mutex_unlock(&__ip_vs_app_mutex);
mutex_unlock(&ipvs->app_mutex);
/* decrease the module use count */
ip_vs_use_count_dec();
@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
/*
* Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
*/
int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
int ip_vs_bind_app(struct ip_vs_conn *cp,
struct ip_vs_protocol *pp)
{
return pp->app_conn_bind(cp);
}
@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
* /proc/net/ip_vs_app entry function
*/
static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
{
struct ip_vs_app *app, *inc;
list_for_each_entry(app, &ip_vs_app_list, a_list) {
list_for_each_entry(app, &ipvs->app_list, a_list) {
list_for_each_entry(inc, &app->incs_list, a_list) {
if (pos-- == 0)
return inc;
@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
{
mutex_lock(&__ip_vs_app_mutex);
struct net *net = seq_file_net(seq);
struct netns_ipvs *ipvs = net_ipvs(net);
return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
mutex_lock(&ipvs->app_mutex);
return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
}
static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_app *inc, *app;
struct list_head *e;
struct net *net = seq_file_net(seq);
struct netns_ipvs *ipvs = net_ipvs(net);
++*pos;
if (v == SEQ_START_TOKEN)
return ip_vs_app_idx(0);
return ip_vs_app_idx(ipvs, 0);
inc = v;
app = inc->app;
@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return list_entry(e, struct ip_vs_app, a_list);
/* go on to next application */
for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
app = list_entry(e, struct ip_vs_app, a_list);
list_for_each_entry(inc, &app->incs_list, a_list) {
return inc;
@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
{
mutex_unlock(&__ip_vs_app_mutex);
struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
mutex_unlock(&ipvs->app_mutex);
}
static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = {
static int ip_vs_app_open(struct inode *inode, struct file *file)
{
return seq_open(file, &ip_vs_app_seq_ops);
return seq_open_net(inode, file, &ip_vs_app_seq_ops,
sizeof(struct seq_net_private));
}
static const struct file_operations ip_vs_app_fops = {
@ -569,15 +578,36 @@ static const struct file_operations ip_vs_app_fops = {
};
#endif
static int __net_init __ip_vs_app_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
INIT_LIST_HEAD(&ipvs->app_list);
__mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
return 0;
}
static void __net_exit __ip_vs_app_cleanup(struct net *net)
{
proc_net_remove(net, "ip_vs_app");
}
static struct pernet_operations ip_vs_app_ops = {
.init = __ip_vs_app_init,
.exit = __ip_vs_app_cleanup,
};
int __init ip_vs_app_init(void)
{
/* we will replace it with proc_net_ipvs_create() soon */
proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
return 0;
int rv;
rv = register_pernet_subsys(&ip_vs_app_ops);
return rv;
}
void ip_vs_app_cleanup(void)
{
proc_net_remove(&init_net, "ip_vs_app");
unregister_pernet_subsys(&ip_vs_app_ops);
}

View File

@ -48,35 +48,32 @@
/*
* Connection hash size. Default is what was selected at compile time.
*/
int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
/* size and mask values */
int ip_vs_conn_tab_size;
int ip_vs_conn_tab_mask;
int ip_vs_conn_tab_size __read_mostly;
static int ip_vs_conn_tab_mask __read_mostly;
/*
* Connection hash table: for input and output packets lookups of IPVS
*/
static struct list_head *ip_vs_conn_tab;
static struct list_head *ip_vs_conn_tab __read_mostly;
/* SLAB cache for IPVS connections */
static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
/* counter for current IPVS connections */
static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
/* counter for no client port connections */
static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
/* random value for IPVS connection hash */
static unsigned int ip_vs_conn_rnd;
static unsigned int ip_vs_conn_rnd __read_mostly;
/*
* Fine locking granularity for big connection hash table
*/
#define CT_LOCKARRAY_BITS 4
#define CT_LOCKARRAY_BITS 5
#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key)
/*
* Returns hash value for IPVS connection entry
*/
static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
const union nf_inet_addr *addr,
__be16 port)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
(__force u32)port, proto, ip_vs_conn_rnd)
& ip_vs_conn_tab_mask;
return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
(__force u32)port, proto, ip_vs_conn_rnd) ^
((size_t)net>>8)) & ip_vs_conn_tab_mask;
#endif
return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
ip_vs_conn_rnd)
& ip_vs_conn_tab_mask;
return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
ip_vs_conn_rnd) ^
((size_t)net>>8)) & ip_vs_conn_tab_mask;
}
static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@ -166,18 +163,18 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
port = p->vport;
}
return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
}
static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
{
struct ip_vs_conn_param p;
ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
NULL, 0, &p);
ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
&cp->caddr, cp->cport, NULL, 0, &p);
if (cp->dest && cp->dest->svc->pe) {
p.pe = cp->dest->svc->pe;
if (cp->pe) {
p.pe = cp->pe;
p.pe_data = cp->pe_data;
p.pe_data_len = cp->pe_data_len;
}
@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
}
/*
* Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
* Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
* returns bool success.
*/
static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->af == p->af &&
p->cport == cp->cport && p->vport == cp->vport &&
ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
p->cport == cp->cport && p->vport == cp->vport &&
((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
p->protocol == cp->protocol) {
p->protocol == cp->protocol &&
ip_vs_conn_net_eq(cp, p->net)) {
/* HIT */
atomic_inc(&cp->refcnt);
ct_read_unlock(hash);
@ -313,23 +311,23 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
struct ip_vs_conn_param *p)
{
__be16 _ports[2], *pptr;
struct net *net = skb_net(skb);
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return 1;
if (likely(!inverse))
ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
&iph->daddr, pptr[1], p);
ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
pptr[0], &iph->daddr, pptr[1], p);
else
ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
&iph->saddr, pptr[0], p);
ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
pptr[1], &iph->saddr, pptr[0], p);
return 0;
}
struct ip_vs_conn *
ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off, int inverse)
{
@ -353,8 +351,10 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (!ip_vs_conn_net_eq(cp, p->net))
continue;
if (p->pe_data && p->pe->ct_match) {
if (p->pe->ct_match(p, cp))
if (p->pe == cp->pe && p->pe->ct_match(p, cp))
goto out;
continue;
}
@ -404,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->af == p->af &&
p->vport == cp->cport && p->cport == cp->dport &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
p->vport == cp->cport && p->cport == cp->dport &&
p->protocol == cp->protocol) {
p->protocol == cp->protocol &&
ip_vs_conn_net_eq(cp, p->net)) {
/* HIT */
atomic_inc(&cp->refcnt);
ret = cp;
@ -428,7 +429,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
struct ip_vs_conn *
ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off, int inverse)
{
@ -611,9 +611,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
struct ip_vs_dest *dest;
if ((cp) && (!cp->dest)) {
dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
&cp->vaddr, cp->vport,
cp->protocol);
dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark);
ip_vs_bind_dest(cp, dest);
return dest;
} else
@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
int ip_vs_check_template(struct ip_vs_conn *ct)
{
struct ip_vs_dest *dest = ct->dest;
struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
/*
* Checking the dest server status.
*/
if ((dest == NULL) ||
!(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
(sysctl_ip_vs_expire_quiescent_template &&
(ipvs->sysctl_expire_quiescent_template &&
(atomic_read(&dest->weight) == 0))) {
IP_VS_DBG_BUF(9, "check_template: dest not available for "
"protocol %s s:%s:%d v:%s:%d "
@ -730,6 +731,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
static void ip_vs_conn_expire(unsigned long data)
{
struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
cp->timeout = 60*HZ;
@ -765,13 +767,14 @@ static void ip_vs_conn_expire(unsigned long data)
if (cp->flags & IP_VS_CONN_F_NFCT)
ip_vs_conn_drop_conntrack(cp);
ip_vs_pe_put(cp->pe);
kfree(cp->pe_data);
if (unlikely(cp->app != NULL))
ip_vs_unbind_app(cp);
ip_vs_unbind_dest(cp);
if (cp->flags & IP_VS_CONN_F_NO_CPORT)
atomic_dec(&ip_vs_conn_no_cport_cnt);
atomic_dec(&ip_vs_conn_count);
atomic_dec(&ipvs->conn_count);
kmem_cache_free(ip_vs_conn_cachep, cp);
return;
@ -802,10 +805,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
struct ip_vs_conn *
ip_vs_conn_new(const struct ip_vs_conn_param *p,
const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
struct ip_vs_dest *dest)
struct ip_vs_dest *dest, __u32 fwmark)
{
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
struct netns_ipvs *ipvs = net_ipvs(p->net);
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
p->protocol);
cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
@ -815,6 +820,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
INIT_LIST_HEAD(&cp->c_list);
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
ip_vs_conn_net_set(cp, p->net);
cp->af = p->af;
cp->protocol = p->protocol;
ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@ -826,7 +832,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
&cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
cp->fwmark = fwmark;
if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
ip_vs_pe_get(p->pe);
cp->pe = p->pe;
cp->pe_data = p->pe_data;
cp->pe_data_len = p->pe_data_len;
}
@ -842,7 +851,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
atomic_set(&cp->n_control, 0);
atomic_set(&cp->in_pkts, 0);
atomic_inc(&ip_vs_conn_count);
atomic_inc(&ipvs->conn_count);
if (flags & IP_VS_CONN_F_NO_CPORT)
atomic_inc(&ip_vs_conn_no_cport_cnt);
@ -861,8 +870,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
#endif
ip_vs_bind_xmit(cp);
if (unlikely(pp && atomic_read(&pp->appcnt)))
ip_vs_bind_app(cp, pp);
if (unlikely(pd && atomic_read(&pd->appcnt)))
ip_vs_bind_app(cp, pd->pp);
/*
* Allow conntrack to be preserved. By default, conntrack
@ -871,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
* IP_VS_CONN_F_ONE_PACKET too.
*/
if (ip_vs_conntrack_enabled())
if (ip_vs_conntrack_enabled(ipvs))
cp->flags |= IP_VS_CONN_F_NFCT;
/* Hash it in the ip_vs_conn_tab finally */
@ -884,17 +893,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
* /proc/net/ip_vs_conn entries
*/
#ifdef CONFIG_PROC_FS
struct ip_vs_iter_state {
struct seq_net_private p;
struct list_head *l;
};
static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
{
int idx;
struct ip_vs_conn *cp;
struct ip_vs_iter_state *iter = seq->private;
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
if (pos-- == 0) {
seq->private = &ip_vs_conn_tab[idx];
iter->l = &ip_vs_conn_tab[idx];
return cp;
}
}
@ -906,14 +920,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
{
seq->private = NULL;
struct ip_vs_iter_state *iter = seq->private;
iter->l = NULL;
return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
}
static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_conn *cp = v;
struct list_head *e, *l = seq->private;
struct ip_vs_iter_state *iter = seq->private;
struct list_head *e, *l = iter->l;
int idx;
++*pos;
@ -930,18 +947,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
while (++idx < ip_vs_conn_tab_size) {
ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
seq->private = &ip_vs_conn_tab[idx];
iter->l = &ip_vs_conn_tab[idx];
return cp;
}
ct_read_unlock_bh(idx);
}
seq->private = NULL;
iter->l = NULL;
return NULL;
}
static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
{
struct list_head *l = seq->private;
struct ip_vs_iter_state *iter = seq->private;
struct list_head *l = iter->l;
if (l)
ct_read_unlock_bh(l - ip_vs_conn_tab);
@ -955,18 +973,19 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
"Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
else {
const struct ip_vs_conn *cp = v;
struct net *net = seq_file_net(seq);
char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
size_t len = 0;
if (cp->dest && cp->pe_data &&
cp->dest->svc->pe->show_pe_data) {
if (!ip_vs_conn_net_eq(cp, net))
return 0;
if (cp->pe_data) {
pe_data[0] = ' ';
len = strlen(cp->dest->svc->pe->name);
memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
len = strlen(cp->pe->name);
memcpy(pe_data + 1, cp->pe->name, len);
pe_data[len + 1] = ' ';
len += 2;
len += cp->dest->svc->pe->show_pe_data(cp,
pe_data + len);
len += cp->pe->show_pe_data(cp, pe_data + len);
}
pe_data[len] = '\0';
@ -1004,7 +1023,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
static int ip_vs_conn_open(struct inode *inode, struct file *file)
{
return seq_open(file, &ip_vs_conn_seq_ops);
return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
sizeof(struct ip_vs_iter_state));
}
static const struct file_operations ip_vs_conn_fops = {
@ -1031,6 +1051,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
"Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
else {
const struct ip_vs_conn *cp = v;
struct net *net = seq_file_net(seq);
if (!ip_vs_conn_net_eq(cp, net))
return 0;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
@ -1067,7 +1091,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
{
return seq_open(file, &ip_vs_conn_sync_seq_ops);
return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
sizeof(struct ip_vs_iter_state));
}
static const struct file_operations ip_vs_conn_sync_fops = {
@ -1113,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
}
/* Called from keventd and must protect itself from softirqs */
void ip_vs_random_dropentry(void)
void ip_vs_random_dropentry(struct net *net)
{
int idx;
struct ip_vs_conn *cp;
@ -1133,7 +1158,8 @@ void ip_vs_random_dropentry(void)
if (cp->flags & IP_VS_CONN_F_TEMPLATE)
/* connection template */
continue;
if (!ip_vs_conn_net_eq(cp, net))
continue;
if (cp->protocol == IPPROTO_TCP) {
switch(cp->state) {
case IP_VS_TCP_S_SYN_RECV:
@ -1168,12 +1194,13 @@ void ip_vs_random_dropentry(void)
/*
* Flush all the connection entries in the ip_vs_conn_tab
*/
static void ip_vs_conn_flush(void)
static void ip_vs_conn_flush(struct net *net)
{
int idx;
struct ip_vs_conn *cp;
struct netns_ipvs *ipvs = net_ipvs(net);
flush_again:
flush_again:
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
/*
* Lock is actually needed in this loop.
@ -1181,7 +1208,8 @@ static void ip_vs_conn_flush(void)
ct_write_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
if (!ip_vs_conn_net_eq(cp, net))
continue;
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
if (cp->control) {
@ -1194,16 +1222,41 @@ static void ip_vs_conn_flush(void)
/* the counter may be not NULL, because maybe some conn entries
are run by slow timer handler or unhashed but still referred */
if (atomic_read(&ip_vs_conn_count) != 0) {
if (atomic_read(&ipvs->conn_count) != 0) {
schedule();
goto flush_again;
}
}
/*
* per netns init and exit
*/
int __net_init __ip_vs_conn_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
atomic_set(&ipvs->conn_count, 0);
proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
return 0;
}
static void __net_exit __ip_vs_conn_cleanup(struct net *net)
{
/* flush all the connection entries first */
ip_vs_conn_flush(net);
proc_net_remove(net, "ip_vs_conn");
proc_net_remove(net, "ip_vs_conn_sync");
}
static struct pernet_operations ipvs_conn_ops = {
.init = __ip_vs_conn_init,
.exit = __ip_vs_conn_cleanup,
};
int __init ip_vs_conn_init(void)
{
int idx;
int retc;
/* Compute size and mask */
ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@ -1241,24 +1294,18 @@ int __init ip_vs_conn_init(void)
rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
}
proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
retc = register_pernet_subsys(&ipvs_conn_ops);
/* calculate the random value for connection hash */
get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
return 0;
return retc;
}
void ip_vs_conn_cleanup(void)
{
/* flush all the connection entries first */
ip_vs_conn_flush();
unregister_pernet_subsys(&ipvs_conn_ops);
/* Release the empty cache */
kmem_cache_destroy(ip_vs_conn_cachep);
proc_net_remove(&init_net, "ip_vs_conn");
proc_net_remove(&init_net, "ip_vs_conn_sync");
vfree(ip_vs_conn_tab);
}

View File

@ -41,6 +41,7 @@
#include <net/icmp.h> /* for icmp_send */
#include <net/route.h>
#include <net/ip6_checksum.h>
#include <net/netns/generic.h> /* net_generic() */
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put);
EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
int ip_vs_net_id __read_mostly;
#ifdef IP_VS_GENERIC_NETNS
EXPORT_SYMBOL(ip_vs_net_id);
#endif
/* netns cnt used for uniqueness */
static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
/* ID used in ICMP lookups */
#define icmp_id(icmph) (((icmph)->un).echo.id)
@ -108,21 +115,28 @@ static inline void
ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_dest *dest = cp->dest;
struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
spin_lock(&dest->stats.lock);
dest->stats.ustats.inpkts++;
dest->stats.ustats.inbytes += skb->len;
spin_unlock(&dest->stats.lock);
struct ip_vs_cpu_stats *s;
spin_lock(&dest->svc->stats.lock);
dest->svc->stats.ustats.inpkts++;
dest->svc->stats.ustats.inbytes += skb->len;
spin_unlock(&dest->svc->stats.lock);
s = this_cpu_ptr(dest->stats.cpustats);
s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
spin_lock(&ip_vs_stats.lock);
ip_vs_stats.ustats.inpkts++;
ip_vs_stats.ustats.inbytes += skb->len;
spin_unlock(&ip_vs_stats.lock);
s = this_cpu_ptr(dest->svc->stats.cpustats);
s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(ipvs->cpustats);
s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
}
}
@ -131,21 +145,28 @@ static inline void
ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_dest *dest = cp->dest;
struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
spin_lock(&dest->stats.lock);
dest->stats.ustats.outpkts++;
dest->stats.ustats.outbytes += skb->len;
spin_unlock(&dest->stats.lock);
struct ip_vs_cpu_stats *s;
spin_lock(&dest->svc->stats.lock);
dest->svc->stats.ustats.outpkts++;
dest->svc->stats.ustats.outbytes += skb->len;
spin_unlock(&dest->svc->stats.lock);
s = this_cpu_ptr(dest->stats.cpustats);
s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
spin_lock(&ip_vs_stats.lock);
ip_vs_stats.ustats.outpkts++;
ip_vs_stats.ustats.outbytes += skb->len;
spin_unlock(&ip_vs_stats.lock);
s = this_cpu_ptr(dest->svc->stats.cpustats);
s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(ipvs->cpustats);
s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
}
}
@ -153,41 +174,44 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
static inline void
ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
{
spin_lock(&cp->dest->stats.lock);
cp->dest->stats.ustats.conns++;
spin_unlock(&cp->dest->stats.lock);
struct netns_ipvs *ipvs = net_ipvs(svc->net);
struct ip_vs_cpu_stats *s;
spin_lock(&svc->stats.lock);
svc->stats.ustats.conns++;
spin_unlock(&svc->stats.lock);
s = this_cpu_ptr(cp->dest->stats.cpustats);
s->ustats.conns++;
spin_lock(&ip_vs_stats.lock);
ip_vs_stats.ustats.conns++;
spin_unlock(&ip_vs_stats.lock);
s = this_cpu_ptr(svc->stats.cpustats);
s->ustats.conns++;
s = this_cpu_ptr(ipvs->cpustats);
s->ustats.conns++;
}
static inline int
ip_vs_set_state(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
struct ip_vs_protocol *pp)
struct ip_vs_proto_data *pd)
{
if (unlikely(!pp->state_transition))
if (unlikely(!pd->pp->state_transition))
return 0;
return pp->state_transition(cp, direction, skb, pp);
return pd->pp->state_transition(cp, direction, skb, pd);
}
static inline void
static inline int
ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
struct sk_buff *skb, int protocol,
const union nf_inet_addr *caddr, __be16 cport,
const union nf_inet_addr *vaddr, __be16 vport,
struct ip_vs_conn_param *p)
{
ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
vport, p);
p->pe = svc->pe;
if (p->pe && p->pe->fill_param)
p->pe->fill_param(p, skb);
return p->pe->fill_param(p, skb);
return 0;
}
/*
@ -200,7 +224,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
static struct ip_vs_conn *
ip_vs_sched_persist(struct ip_vs_service *svc,
struct sk_buff *skb,
__be16 ports[2])
__be16 src_port, __be16 dst_port, int *ignored)
{
struct ip_vs_conn *cp = NULL;
struct ip_vs_iphdr iph;
@ -224,8 +248,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
"mnet %s\n",
IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
IP_VS_DBG_ADDR(svc->af, &snet));
/*
@ -247,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
__be16 vport = 0;
if (ports[1] == svc->port) {
if (dst_port == svc->port) {
/* non-FTP template:
* <protocol, caddr, 0, vaddr, vport, daddr, dport>
* FTP template:
* <protocol, caddr, 0, vaddr, 0, daddr, 0>
*/
if (svc->port != FTPPORT)
vport = ports[1];
vport = dst_port;
} else {
/* Note: persistent fwmark-based services and
* persistent port zero service are handled here.
@ -268,24 +292,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
vaddr = &fwmark;
}
}
ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
vaddr, vport, &param);
/* return *ignored = -1 so NF_DROP can be used */
if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
vaddr, vport, &param) < 0) {
*ignored = -1;
return NULL;
}
}
/* Check if a template already exists */
ct = ip_vs_ct_in_get(&param);
if (!ct || !ip_vs_check_template(ct)) {
/* No template found or the dest of the connection
/*
* No template found or the dest of the connection
* template is not available.
* return *ignored=0 i.e. ICMP and NF_DROP
*/
dest = svc->scheduler->schedule(svc, skb);
if (!dest) {
IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data);
*ignored = 0;
return NULL;
}
if (ports[1] == svc->port && svc->port != FTPPORT)
if (dst_port == svc->port && svc->port != FTPPORT)
dport = dest->port;
/* Create a template
@ -293,9 +324,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* and thus param.pe_data will be destroyed
* when the template expires */
ct = ip_vs_conn_new(&param, &dest->addr, dport,
IP_VS_CONN_F_TEMPLATE, dest);
IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
if (ct == NULL) {
kfree(param.pe_data);
*ignored = -1;
return NULL;
}
@ -306,7 +338,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
kfree(param.pe_data);
}
dport = ports[1];
dport = dst_port;
if (dport == svc->port && dest->port)
dport = dest->port;
@ -317,11 +349,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a new connection according to the template
*/
ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
&iph.daddr, ports[1], &param);
cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest);
ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
src_port, &iph.daddr, dst_port, &param);
cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
if (cp == NULL) {
ip_vs_conn_put(ct);
*ignored = -1;
return NULL;
}
@ -341,11 +375,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* It selects a server according to the virtual service, and
* creates a connection entry.
* Protocols supported: TCP, UDP
*
* Usage of *ignored
*
* 1 : protocol tried to schedule (eg. on SYN), found svc but the
* svc/scheduler decides that this packet should be accepted with
* NF_ACCEPT because it must not be scheduled.
*
* 0 : scheduler can not find destination, so try bypass or
* return ICMP and then NF_DROP (ip_vs_leave).
*
* -1 : scheduler tried to schedule but fatal error occurred, eg.
* ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
* failure such as missing Call-ID, ENOMEM on skb_linearize
* or pe_data. In this case we should return NF_DROP without
* any attempts to send ICMP with ip_vs_leave.
*/
struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp, int *ignored)
struct ip_vs_proto_data *pd, int *ignored)
{
struct ip_vs_protocol *pp = pd->pp;
struct ip_vs_conn *cp = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_dest *dest;
@ -371,12 +421,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
}
/*
* Do not schedule replies from local real server. It is risky
* for fwmark services but mostly for persistent services.
* Do not schedule replies from local real server.
*/
if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
(svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
(cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
(cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
"Not scheduling reply for existing connection");
__ip_vs_conn_put(cp);
@ -386,10 +434,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* Persistent service
*/
if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
*ignored = 0;
return ip_vs_sched_persist(svc, skb, pptr);
}
if (svc->flags & IP_VS_SVC_F_PERSISTENT)
return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
*ignored = 0;
/*
* Non-persistent service
@ -402,8 +450,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
return NULL;
}
*ignored = 0;
dest = svc->scheduler->schedule(svc, skb);
if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n");
@ -419,13 +465,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
*/
{
struct ip_vs_conn_param p;
ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
pptr[0], &iph.daddr, pptr[1], &p);
ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
&iph.saddr, pptr[0], &iph.daddr, pptr[1],
&p);
cp = ip_vs_conn_new(&p, &dest->addr,
dest->port ? dest->port : pptr[1],
flags, dest);
if (!cp)
flags, dest, skb->mark);
if (!cp) {
*ignored = -1;
return NULL;
}
}
IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@ -447,11 +497,14 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
* no destination is available for a new connection.
*/
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_protocol *pp)
struct ip_vs_proto_data *pd)
{
struct net *net;
struct netns_ipvs *ipvs;
__be16 _ports[2], *pptr;
struct ip_vs_iphdr iph;
int unicast;
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@ -459,18 +512,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_service_put(svc);
return NF_DROP;
}
net = skb_net(skb);
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
else
#endif
unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
/* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is a non-local unicast, then create
a cache_bypass connection entry */
if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
ipvs = net_ipvs(net);
if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
int ret, cs;
struct ip_vs_conn *cp;
unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@ -484,12 +539,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{
struct ip_vs_conn_param p;
ip_vs_conn_fill_param(svc->af, iph.protocol,
ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
&iph.saddr, pptr[0],
&iph.daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &daddr, 0,
IP_VS_CONN_F_BYPASS | flags,
NULL);
NULL, skb->mark);
if (!cp)
return NF_DROP;
}
@ -498,10 +553,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_in_stats(cp, skb);
/* set state */
cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
/* transmit the first SYN packet */
ret = cp->packet_xmit(skb, cp, pp);
ret = cp->packet_xmit(skb, cp, pd->pp);
/* do not touch skb anymore */
atomic_inc(&cp->in_pkts);
@ -682,6 +737,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp,
unsigned int offset, unsigned int ihl)
{
struct netns_ipvs *ipvs;
unsigned int verdict = NF_DROP;
if (IP_VS_FWD_METHOD(cp) != 0) {
@ -703,6 +759,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
if (!skb_make_writable(skb, offset))
goto out;
ipvs = net_ipvs(skb_net(skb));
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@ -712,11 +770,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
goto out;
} else
#endif
if ((sysctl_ip_vs_snat_reroute ||
if ((ipvs->sysctl_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto out;
@ -808,7 +866,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
if (!cp)
return NF_ACCEPT;
@ -885,7 +943,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
if (!cp)
return NF_ACCEPT;
@ -924,9 +982,12 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
* Used for NAT and local client.
*/
static unsigned int
handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct ip_vs_conn *cp, int ihl)
{
struct ip_vs_protocol *pp = pd->pp;
struct netns_ipvs *ipvs;
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
if (!skb_make_writable(skb, ihl))
@ -961,13 +1022,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* if it came from this machine itself. So re-compute
* the routing information.
*/
ipvs = net_ipvs(skb_net(skb));
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
goto drop;
} else
#endif
if ((sysctl_ip_vs_snat_reroute ||
if ((ipvs->sysctl_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto drop;
@ -975,7 +1038,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
skb->ipvs_property = 1;
if (!(cp->flags & IP_VS_CONN_F_NFCT))
ip_vs_notrack(skb);
@ -999,9 +1062,12 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
static unsigned int
ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
{
struct net *net = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
struct netns_ipvs *ipvs;
EnterFunction(11);
@ -1022,6 +1088,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
net = skb_net(skb);
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
@ -1045,9 +1112,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
pp = ip_vs_proto_get(iph.protocol);
if (unlikely(!pp))
pd = ip_vs_proto_data_get(net, iph.protocol);
if (unlikely(!pd))
return NF_ACCEPT;
pp = pd->pp;
/* reassemble IP fragments */
#ifdef CONFIG_IP_VS_IPV6
@ -1073,11 +1141,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
/*
* Check if the packet belongs to an existing entry
*/
cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
ipvs = net_ipvs(net);
if (likely(cp))
return handle_response(af, skb, pp, cp, iph.len);
if (sysctl_ip_vs_nat_icmp_send &&
return handle_response(af, skb, pd, cp, iph.len);
if (ipvs->sysctl_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
@ -1087,7 +1156,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
sizeof(_ports), _ports);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
if (ip_vs_lookup_real_service(af, iph.protocol,
if (ip_vs_lookup_real_service(net, af, iph.protocol,
&iph.saddr,
pptr[0])) {
/*
@ -1202,12 +1271,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
static int
ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
{
struct net *net = NULL;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
unsigned int offset, ihl, verdict;
union nf_inet_addr snet;
@ -1249,9 +1320,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
pp = ip_vs_proto_get(cih->protocol);
if (!pp)
net = skb_net(skb);
pd = ip_vs_proto_data_get(net, cih->protocol);
if (!pd)
return NF_ACCEPT;
pp = pd->pp;
/* Is the embedded protocol header present? */
if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@ -1265,10 +1338,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
if (!cp) {
/* The packet could also belong to a local client */
cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
if (cp) {
snet.ip = iph->saddr;
return handle_response_icmp(AF_INET, skb, &snet,
@ -1312,6 +1385,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
static int
ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
{
struct net *net = NULL;
struct ipv6hdr *iph;
struct icmp6hdr _icmph, *ic;
struct ipv6hdr _ciph, *cih; /* The ip header contained
@ -1319,6 +1393,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
unsigned int offset, verdict;
union nf_inet_addr snet;
struct rt6_info *rt;
@ -1361,9 +1436,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
pp = ip_vs_proto_get(cih->nexthdr);
if (!pp)
net = skb_net(skb);
pd = ip_vs_proto_data_get(net, cih->nexthdr);
if (!pd)
return NF_ACCEPT;
pp = pd->pp;
/* Is the embedded protocol header present? */
/* TODO: we don't support fragmentation at the moment anyways */
@ -1377,10 +1454,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
if (!cp) {
/* The packet could also belong to a local client */
cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
if (cp) {
ipv6_addr_copy(&snet.in6, &iph->saddr);
return handle_response_icmp(AF_INET6, skb, &snet,
@ -1423,10 +1500,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
static unsigned int
ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
{
struct net *net;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
int ret, restart, pkts;
struct netns_ipvs *ipvs;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@ -1480,20 +1560,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
net = skb_net(skb);
/* Protocol supported? */
pp = ip_vs_proto_get(iph.protocol);
if (unlikely(!pp))
pd = ip_vs_proto_data_get(net, iph.protocol);
if (unlikely(!pd))
return NF_ACCEPT;
pp = pd->pp;
/*
* Check if the packet belongs to an existing connection entry
*/
cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
if (unlikely(!cp)) {
int v;
if (!pp->conn_schedule(af, skb, pp, &v, &cp))
if (!pp->conn_schedule(af, skb, pd, &v, &cp))
return v;
}
@ -1505,12 +1586,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
}
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
net = skb_net(skb);
ipvs = net_ipvs(net);
/* Check the server status */
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
if (sysctl_ip_vs_expire_nodest_conn) {
if (ipvs->sysctl_expire_nodest_conn) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
}
@ -1521,7 +1603,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
}
ip_vs_in_stats(cp, skb);
restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
if (cp->packet_xmit)
ret = cp->packet_xmit(skb, cp, pp);
/* do not touch skb anymore */
@ -1535,35 +1617,41 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*
* Sync connection if it is about to close to
* encorage the standby servers to update the connections timeout
*
* For ONE_PKT let ip_vs_sync_conn() do the filter work.
*/
pkts = atomic_add_return(1, &cp->in_pkts);
if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
pkts = ipvs->sysctl_sync_threshold[0];
else
pkts = atomic_add_return(1, &cp->in_pkts);
if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
(pkts % sysctl_ip_vs_sync_threshold[1]
== sysctl_ip_vs_sync_threshold[0])) ||
(pkts % ipvs->sysctl_sync_threshold[1]
== ipvs->sysctl_sync_threshold[0])) ||
(cp->old_state != cp->state &&
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
ip_vs_sync_conn(cp);
ip_vs_sync_conn(net, cp);
goto out;
}
}
/* Keep this block last: TCP and others with pp->num_states <= 1 */
else if (af == AF_INET &&
(ip_vs_sync_state & IP_VS_STATE_MASTER) &&
else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
(pkts % sysctl_ip_vs_sync_threshold[1]
== sysctl_ip_vs_sync_threshold[0])) ||
(pkts % ipvs->sysctl_sync_threshold[1]
== ipvs->sysctl_sync_threshold[0])) ||
((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
(cp->state == IP_VS_TCP_S_CLOSE) ||
(cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
(cp->state == IP_VS_TCP_S_TIME_WAIT)))))
ip_vs_sync_conn(cp);
ip_vs_sync_conn(net, cp);
out:
cp->old_state = cp->state;
@ -1782,7 +1870,41 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
},
#endif
};
/*
* Initialize IP Virtual Server netns mem.
*/
static int __net_init __ip_vs_init(struct net *net)
{
struct netns_ipvs *ipvs;
ipvs = net_generic(net, ip_vs_net_id);
if (ipvs == NULL) {
pr_err("%s(): no memory.\n", __func__);
return -ENOMEM;
}
ipvs->net = net;
/* Counters used for creating unique names */
ipvs->gen = atomic_read(&ipvs_netns_cnt);
atomic_inc(&ipvs_netns_cnt);
net->ipvs = ipvs;
printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n",
sizeof(struct netns_ipvs), ipvs->gen);
return 0;
}
static void __net_exit __ip_vs_cleanup(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen);
}
static struct pernet_operations ipvs_core_ops = {
.init = __ip_vs_init,
.exit = __ip_vs_cleanup,
.id = &ip_vs_net_id,
.size = sizeof(struct netns_ipvs),
};
/*
* Initialize IP Virtual Server
@ -1791,8 +1913,11 @@ static int __init ip_vs_init(void)
{
int ret;
ip_vs_estimator_init();
ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */
if (ret < 0)
return ret;
ip_vs_estimator_init();
ret = ip_vs_control_init();
if (ret < 0) {
pr_err("can't setup control.\n");
@ -1813,15 +1938,23 @@ static int __init ip_vs_init(void)
goto cleanup_app;
}
ret = ip_vs_sync_init();
if (ret < 0) {
pr_err("can't setup sync data.\n");
goto cleanup_conn;
}
ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
if (ret < 0) {
pr_err("can't register hooks.\n");
goto cleanup_conn;
goto cleanup_sync;
}
pr_info("ipvs loaded.\n");
return ret;
cleanup_sync:
ip_vs_sync_cleanup();
cleanup_conn:
ip_vs_conn_cleanup();
cleanup_app:
@ -1831,17 +1964,20 @@ static int __init ip_vs_init(void)
ip_vs_control_cleanup();
cleanup_estimator:
ip_vs_estimator_cleanup();
unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
return ret;
}
static void __exit ip_vs_cleanup(void)
{
nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
ip_vs_sync_cleanup();
ip_vs_conn_cleanup();
ip_vs_app_cleanup();
ip_vs_protocol_cleanup();
ip_vs_control_cleanup();
ip_vs_estimator_cleanup();
unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */
pr_info("ipvs unloaded.\n");
}

File diff suppressed because it is too large Load Diff

View File

@ -8,8 +8,12 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Changes:
*
* Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
* Network name space (netns) aware.
* Global data moved to netns i.e struct netns_ipvs
* Affected data: est_list and est_lock.
* estimation_timer() runs with timer per netns.
* get_stats()) do the per cpu summing.
*/
#define KMSG_COMPONENT "IPVS"
@ -48,11 +52,42 @@
*/
static void estimation_timer(unsigned long arg);
/*
* Make a summary from each cpu
*/
static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
struct ip_vs_cpu_stats *stats)
{
int i;
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
unsigned int start;
__u64 inbytes, outbytes;
if (i) {
sum->conns += s->ustats.conns;
sum->inpkts += s->ustats.inpkts;
sum->outpkts += s->ustats.outpkts;
do {
start = u64_stats_fetch_begin_bh(&s->syncp);
inbytes = s->ustats.inbytes;
outbytes = s->ustats.outbytes;
} while (u64_stats_fetch_retry_bh(&s->syncp, start));
sum->inbytes += inbytes;
sum->outbytes += outbytes;
} else {
sum->conns = s->ustats.conns;
sum->inpkts = s->ustats.inpkts;
sum->outpkts = s->ustats.outpkts;
do {
start = u64_stats_fetch_begin_bh(&s->syncp);
sum->inbytes = s->ustats.inbytes;
sum->outbytes = s->ustats.outbytes;
} while (u64_stats_fetch_retry_bh(&s->syncp, start));
}
}
}
static LIST_HEAD(est_list);
static DEFINE_SPINLOCK(est_lock);
static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
static void estimation_timer(unsigned long arg)
{
@ -62,11 +97,16 @@ static void estimation_timer(unsigned long arg)
u32 n_inpkts, n_outpkts;
u64 n_inbytes, n_outbytes;
u32 rate;
struct net *net = (struct net *)arg;
struct netns_ipvs *ipvs;
spin_lock(&est_lock);
list_for_each_entry(e, &est_list, list) {
ipvs = net_ipvs(net);
ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats);
spin_lock(&ipvs->est_lock);
list_for_each_entry(e, &ipvs->est_list, list) {
s = container_of(e, struct ip_vs_stats, est);
ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
spin_lock(&s->lock);
n_conns = s->ustats.conns;
n_inpkts = s->ustats.inpkts;
@ -75,38 +115,39 @@ static void estimation_timer(unsigned long arg)
n_outbytes = s->ustats.outbytes;
/* scaled by 2^10, but divided 2 seconds */
rate = (n_conns - e->last_conns)<<9;
rate = (n_conns - e->last_conns) << 9;
e->last_conns = n_conns;
e->cps += ((long)rate - (long)e->cps)>>2;
s->ustats.cps = (e->cps+0x1FF)>>10;
e->cps += ((long)rate - (long)e->cps) >> 2;
s->ustats.cps = (e->cps + 0x1FF) >> 10;
rate = (n_inpkts - e->last_inpkts)<<9;
rate = (n_inpkts - e->last_inpkts) << 9;
e->last_inpkts = n_inpkts;
e->inpps += ((long)rate - (long)e->inpps)>>2;
s->ustats.inpps = (e->inpps+0x1FF)>>10;
e->inpps += ((long)rate - (long)e->inpps) >> 2;
s->ustats.inpps = (e->inpps + 0x1FF) >> 10;
rate = (n_outpkts - e->last_outpkts)<<9;
rate = (n_outpkts - e->last_outpkts) << 9;
e->last_outpkts = n_outpkts;
e->outpps += ((long)rate - (long)e->outpps)>>2;
s->ustats.outpps = (e->outpps+0x1FF)>>10;
e->outpps += ((long)rate - (long)e->outpps) >> 2;
s->ustats.outpps = (e->outpps + 0x1FF) >> 10;
rate = (n_inbytes - e->last_inbytes)<<4;
rate = (n_inbytes - e->last_inbytes) << 4;
e->last_inbytes = n_inbytes;
e->inbps += ((long)rate - (long)e->inbps)>>2;
s->ustats.inbps = (e->inbps+0xF)>>5;
e->inbps += ((long)rate - (long)e->inbps) >> 2;
s->ustats.inbps = (e->inbps + 0xF) >> 5;
rate = (n_outbytes - e->last_outbytes)<<4;
rate = (n_outbytes - e->last_outbytes) << 4;
e->last_outbytes = n_outbytes;
e->outbps += ((long)rate - (long)e->outbps)>>2;
s->ustats.outbps = (e->outbps+0xF)>>5;
e->outbps += ((long)rate - (long)e->outbps) >> 2;
s->ustats.outbps = (e->outbps + 0xF) >> 5;
spin_unlock(&s->lock);
}
spin_unlock(&est_lock);
mod_timer(&est_timer, jiffies + 2*HZ);
spin_unlock(&ipvs->est_lock);
mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
}
void ip_vs_new_estimator(struct ip_vs_stats *stats)
void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
{
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_estimator *est = &stats->est;
INIT_LIST_HEAD(&est->list);
@ -126,18 +167,19 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
est->last_outbytes = stats->ustats.outbytes;
est->outbps = stats->ustats.outbps<<5;
spin_lock_bh(&est_lock);
list_add(&est->list, &est_list);
spin_unlock_bh(&est_lock);
spin_lock_bh(&ipvs->est_lock);
list_add(&est->list, &ipvs->est_list);
spin_unlock_bh(&ipvs->est_lock);
}
void ip_vs_kill_estimator(struct ip_vs_stats *stats)
void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
{
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_estimator *est = &stats->est;
spin_lock_bh(&est_lock);
spin_lock_bh(&ipvs->est_lock);
list_del(&est->list);
spin_unlock_bh(&est_lock);
spin_unlock_bh(&ipvs->est_lock);
}
void ip_vs_zero_estimator(struct ip_vs_stats *stats)
@ -157,13 +199,35 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
est->outbps = 0;
}
static int __net_init __ip_vs_estimator_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
INIT_LIST_HEAD(&ipvs->est_list);
spin_lock_init(&ipvs->est_lock);
setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
return 0;
}
static void __net_exit __ip_vs_estimator_exit(struct net *net)
{
del_timer_sync(&net_ipvs(net)->est_timer);
}
static struct pernet_operations ip_vs_app_ops = {
.init = __ip_vs_estimator_init,
.exit = __ip_vs_estimator_exit,
};
int __init ip_vs_estimator_init(void)
{
mod_timer(&est_timer, jiffies + 2 * HZ);
return 0;
int rv;
rv = register_pernet_subsys(&ip_vs_app_ops);
return rv;
}
void ip_vs_estimator_cleanup(void)
{
del_timer_sync(&est_timer);
unregister_pernet_subsys(&ip_vs_app_ops);
}

View File

@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
int ret = 0;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
struct net *net;
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
@ -197,18 +198,20 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
*/
{
struct ip_vs_conn_param p;
ip_vs_conn_fill_param(AF_INET, iph->protocol,
&from, port, &cp->caddr, 0, &p);
ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
iph->protocol, &from, port,
&cp->caddr, 0, &p);
n_cp = ip_vs_conn_out_get(&p);
}
if (!n_cp) {
struct ip_vs_conn_param p;
ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
ip_vs_conn_fill_param(ip_vs_conn_net(cp),
AF_INET, IPPROTO_TCP, &cp->caddr,
0, &cp->vaddr, port, &p);
n_cp = ip_vs_conn_new(&p, &from, port,
IP_VS_CONN_F_NO_CPORT |
IP_VS_CONN_F_NFCT,
cp->dest);
cp->dest, skb->mark);
if (!n_cp)
return 0;
@ -257,8 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* would be adjusted twice.
*/
net = skb_net(skb);
cp->app_data = NULL;
ip_vs_tcp_conn_listen(n_cp);
ip_vs_tcp_conn_listen(net, n_cp);
ip_vs_conn_put(n_cp);
return ret;
}
@ -287,6 +291,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
union nf_inet_addr to;
__be16 port;
struct ip_vs_conn *n_cp;
struct net *net;
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
@ -358,14 +363,15 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
{
struct ip_vs_conn_param p;
ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
&cp->vaddr, htons(ntohs(cp->vport)-1),
&p);
ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
iph->protocol, &to, port, &cp->vaddr,
htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p);
if (!n_cp) {
n_cp = ip_vs_conn_new(&p, &cp->daddr,
htons(ntohs(cp->dport)-1),
IP_VS_CONN_F_NFCT, cp->dest);
IP_VS_CONN_F_NFCT, cp->dest,
skb->mark);
if (!n_cp)
return 0;
@ -377,7 +383,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
/*
* Move tunnel to listen state
*/
ip_vs_tcp_conn_listen(n_cp);
net = skb_net(skb);
ip_vs_tcp_conn_listen(net, n_cp);
ip_vs_conn_put(n_cp);
return 1;
@ -398,23 +405,22 @@ static struct ip_vs_app ip_vs_ftp = {
.pkt_in = ip_vs_ftp_in,
};
/*
* ip_vs_ftp initialization
* per netns ip_vs_ftp initialization
*/
static int __init ip_vs_ftp_init(void)
static int __net_init __ip_vs_ftp_init(struct net *net)
{
int i, ret;
struct ip_vs_app *app = &ip_vs_ftp;
ret = register_ip_vs_app(app);
ret = register_ip_vs_app(net, app);
if (ret)
return ret;
for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
if (!ports[i])
continue;
ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
if (ret)
break;
pr_info("%s: loaded support on port[%d] = %d\n",
@ -422,18 +428,39 @@ static int __init ip_vs_ftp_init(void)
}
if (ret)
unregister_ip_vs_app(app);
unregister_ip_vs_app(net, app);
return ret;
}
/*
* netns exit
*/
static void __ip_vs_ftp_exit(struct net *net)
{
struct ip_vs_app *app = &ip_vs_ftp;
unregister_ip_vs_app(net, app);
}
static struct pernet_operations ip_vs_ftp_ops = {
.init = __ip_vs_ftp_init,
.exit = __ip_vs_ftp_exit,
};
int __init ip_vs_ftp_init(void)
{
int rv;
rv = register_pernet_subsys(&ip_vs_ftp_ops);
return rv;
}
/*
* ip_vs_ftp finish.
*/
static void __exit ip_vs_ftp_exit(void)
{
unregister_ip_vs_app(&ip_vs_ftp);
unregister_pernet_subsys(&ip_vs_ftp_ops);
}

View File

@ -70,7 +70,6 @@
* entries that haven't been touched for a day.
*/
#define COUNT_FOR_FULL_EXPIRATION 30
static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
/*
@ -117,7 +116,7 @@ struct ip_vs_lblc_table {
static ctl_table vs_vars_table[] = {
{
.procname = "lblc_expiration",
.data = &sysctl_ip_vs_lblc_expiration,
.data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = {
{ }
};
static struct ctl_table_header * sysctl_header;
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
list_del(&en->list);
@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
struct ip_vs_lblc_entry *en, *nxt;
unsigned long now = jiffies;
int i, j;
struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@ -255,7 +253,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now,
en->lastuse + sysctl_ip_vs_lblc_expiration))
en->lastuse +
ipvs->sysctl_lblc_expiration))
continue;
ip_vs_lblc_free(en);
@ -543,23 +542,73 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
.schedule = ip_vs_lblc_schedule,
};
/*
* per netns init.
*/
static int __net_init __ip_vs_lblc_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
if (!net_eq(net, &init_net)) {
ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
sizeof(vs_vars_table),
GFP_KERNEL);
if (ipvs->lblc_ctl_table == NULL)
goto err_dup;
} else
ipvs->lblc_ctl_table = vs_vars_table;
ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
ipvs->lblc_ctl_header =
register_net_sysctl_table(net, net_vs_ctl_path,
ipvs->lblc_ctl_table);
if (!ipvs->lblc_ctl_header)
goto err_reg;
return 0;
err_reg:
if (!net_eq(net, &init_net))
kfree(ipvs->lblc_ctl_table);
err_dup:
return -ENOMEM;
}
static void __net_exit __ip_vs_lblc_exit(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
unregister_net_sysctl_table(ipvs->lblc_ctl_header);
if (!net_eq(net, &init_net))
kfree(ipvs->lblc_ctl_table);
}
static struct pernet_operations ip_vs_lblc_ops = {
.init = __ip_vs_lblc_init,
.exit = __ip_vs_lblc_exit,
};
static int __init ip_vs_lblc_init(void)
{
int ret;
sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
ret = register_pernet_subsys(&ip_vs_lblc_ops);
if (ret)
return ret;
ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
if (ret)
unregister_sysctl_table(sysctl_header);
unregister_pernet_subsys(&ip_vs_lblc_ops);
return ret;
}
static void __exit ip_vs_lblc_cleanup(void)
{
unregister_sysctl_table(sysctl_header);
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
unregister_pernet_subsys(&ip_vs_lblc_ops);
}

View File

@ -70,8 +70,6 @@
* entries that haven't been touched for a day.
*/
#define COUNT_FOR_FULL_EXPIRATION 30
static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
/*
* for IPVS lblcr entry hash table
@ -296,7 +294,7 @@ struct ip_vs_lblcr_table {
static ctl_table vs_vars_table[] = {
{
.procname = "lblcr_expiration",
.data = &sysctl_ip_vs_lblcr_expiration,
.data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@ -304,8 +302,6 @@ static ctl_table vs_vars_table[] = {
{ }
};
static struct ctl_table_header * sysctl_header;
static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
{
list_del(&en->list);
@ -425,14 +421,15 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
unsigned long now = jiffies;
int i, j;
struct ip_vs_lblcr_entry *en, *nxt;
struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
now))
if (time_after(en->lastuse
+ ipvs->sysctl_lblcr_expiration, now))
continue;
ip_vs_lblcr_free(en);
@ -664,6 +661,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
read_lock(&svc->sched_lock);
en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
if (en) {
struct netns_ipvs *ipvs = net_ipvs(svc->net);
/* We only hold a read lock, but this is atomic */
en->lastuse = jiffies;
@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
/* More than one destination + enough time passed by, cleanup */
if (atomic_read(&en->set.size) > 1 &&
time_after(jiffies, en->set.lastmod +
sysctl_ip_vs_lblcr_expiration)) {
ipvs->sysctl_lblcr_expiration)) {
struct ip_vs_dest *m;
write_lock(&en->set.lock);
@ -744,23 +742,73 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
.schedule = ip_vs_lblcr_schedule,
};
/*
* per netns init.
*/
static int __net_init __ip_vs_lblcr_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
if (!net_eq(net, &init_net)) {
ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
sizeof(vs_vars_table),
GFP_KERNEL);
if (ipvs->lblcr_ctl_table == NULL)
goto err_dup;
} else
ipvs->lblcr_ctl_table = vs_vars_table;
ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
ipvs->lblcr_ctl_header =
register_net_sysctl_table(net, net_vs_ctl_path,
ipvs->lblcr_ctl_table);
if (!ipvs->lblcr_ctl_header)
goto err_reg;
return 0;
err_reg:
if (!net_eq(net, &init_net))
kfree(ipvs->lblcr_ctl_table);
err_dup:
return -ENOMEM;
}
static void __net_exit __ip_vs_lblcr_exit(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
if (!net_eq(net, &init_net))
kfree(ipvs->lblcr_ctl_table);
}
static struct pernet_operations ip_vs_lblcr_ops = {
.init = __ip_vs_lblcr_init,
.exit = __ip_vs_lblcr_exit,
};
static int __init ip_vs_lblcr_init(void)
{
int ret;
sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
ret = register_pernet_subsys(&ip_vs_lblcr_ops);
if (ret)
return ret;
ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
if (ret)
unregister_sysctl_table(sysctl_header);
unregister_pernet_subsys(&ip_vs_lblcr_ops);
return ret;
}
static void __exit ip_vs_lblcr_cleanup(void)
{
unregister_sysctl_table(sysctl_header);
unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
unregister_pernet_subsys(&ip_vs_lblcr_ops);
}

View File

@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
struct nf_conntrack_tuple *orig, new_reply;
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
struct net *net = nf_ct_net(ct);
if (exp->tuple.src.l3num != PF_INET)
return;
@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
/* RS->CLIENT */
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
&orig->src.u3, orig->src.u.tcp.port,
&orig->dst.u3, orig->dst.u.tcp.port, &p);
cp = ip_vs_conn_out_get(&p);
@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
" for conn " FMT_CONN "\n",
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
&tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
/* Show what happens instead of calling nf_ct_kill() */

View File

@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc)
}
/* Get pe in the pe list by name */
static struct ip_vs_pe *
ip_vs_pe_getbyname(const char *pe_name)
struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
{
struct ip_vs_pe *pe;
IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
pe_name);
spin_lock_bh(&ip_vs_pe_lock);
@ -60,28 +59,22 @@ ip_vs_pe_getbyname(const char *pe_name)
}
/* Lookup pe and try to load it if it doesn't exist */
struct ip_vs_pe *ip_vs_pe_get(const char *name)
struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)
{
struct ip_vs_pe *pe;
/* Search for the pe by name */
pe = ip_vs_pe_getbyname(name);
pe = __ip_vs_pe_getbyname(name);
/* If pe not found, load the module and search again */
if (!pe) {
request_module("ip_vs_pe_%s", name);
pe = ip_vs_pe_getbyname(name);
pe = __ip_vs_pe_getbyname(name);
}
return pe;
}
void ip_vs_pe_put(struct ip_vs_pe *pe)
{
if (pe && pe->module)
module_put(pe->module);
}
/* Register a pe in the pe list */
int register_ip_vs_pe(struct ip_vs_pe *pe)
{

View File

@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
struct ip_vs_iphdr iph;
unsigned int dataoff, datalen, matchoff, matchlen;
const char *dptr;
int retc;
ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
if (dataoff >= skb->len)
return -EINVAL;
if ((retc=skb_linearize(skb)) < 0)
return retc;
dptr = skb->data + dataoff;
datalen = skb->len - dataoff;

View File

@ -60,6 +60,31 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
return 0;
}
/*
* register an ipvs protocols netns related data
*/
static int
register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
{
struct netns_ipvs *ipvs = net_ipvs(net);
unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
struct ip_vs_proto_data *pd =
kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
if (!pd) {
pr_err("%s(): no memory.\n", __func__);
return -ENOMEM;
}
pd->pp = pp; /* For speed issues */
pd->next = ipvs->proto_data_table[hash];
ipvs->proto_data_table[hash] = pd;
atomic_set(&pd->appcnt, 0); /* Init app counter */
if (pp->init_netns != NULL)
pp->init_netns(net, pd);
return 0;
}
/*
* unregister an ipvs protocol
@ -82,6 +107,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
return -ESRCH;
}
/*
* unregister an ipvs protocols netns data
*/
static int
unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
{
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data **pd_p;
unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);
pd_p = &ipvs->proto_data_table[hash];
for (; *pd_p; pd_p = &(*pd_p)->next) {
if (*pd_p == pd) {
*pd_p = pd->next;
if (pd->pp->exit_netns != NULL)
pd->pp->exit_netns(net, pd);
kfree(pd);
return 0;
}
}
return -ESRCH;
}
/*
* get ip_vs_protocol object by its proto.
@ -100,19 +148,44 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
}
EXPORT_SYMBOL(ip_vs_proto_get);
/*
* get ip_vs_protocol object data by netns and proto
*/
struct ip_vs_proto_data *
__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
{
struct ip_vs_proto_data *pd;
unsigned hash = IP_VS_PROTO_HASH(proto);
for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
if (pd->pp->protocol == proto)
return pd;
}
return NULL;
}
struct ip_vs_proto_data *
ip_vs_proto_data_get(struct net *net, unsigned short proto)
{
struct netns_ipvs *ipvs = net_ipvs(net);
return __ipvs_proto_data_get(ipvs, proto);
}
EXPORT_SYMBOL(ip_vs_proto_data_get);
/*
* Propagate event for state change to all protocols
*/
void ip_vs_protocol_timeout_change(int flags)
void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
{
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
int i;
for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
if (pp->timeout_change)
pp->timeout_change(pp, flags);
for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
if (pd->pp->timeout_change)
pd->pp->timeout_change(pd, flags);
}
}
}
@ -236,6 +309,46 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
}
/*
* per network name-space init
*/
static int __net_init __ip_vs_protocol_init(struct net *net)
{
#ifdef CONFIG_IP_VS_PROTO_TCP
register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
#endif
#ifdef CONFIG_IP_VS_PROTO_SCTP
register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
#endif
#ifdef CONFIG_IP_VS_PROTO_AH
register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
#endif
#ifdef CONFIG_IP_VS_PROTO_ESP
register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
#endif
return 0;
}
static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd;
int i;
/* unregister all the ipvs proto data for this netns */
for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
while ((pd = ipvs->proto_data_table[i]) != NULL)
unregister_ip_vs_proto_netns(net, pd);
}
}
static struct pernet_operations ipvs_proto_ops = {
.init = __ip_vs_protocol_init,
.exit = __ip_vs_protocol_cleanup,
};
int __init ip_vs_protocol_init(void)
{
@ -265,6 +378,7 @@ int __init ip_vs_protocol_init(void)
REGISTER_PROTOCOL(&ip_vs_protocol_esp);
#endif
pr_info("Registered protocols (%s)\n", &protocols[2]);
return register_pernet_subsys(&ipvs_proto_ops);
return 0;
}
@ -275,6 +389,7 @@ void ip_vs_protocol_cleanup(void)
struct ip_vs_protocol *pp;
int i;
unregister_pernet_subsys(&ipvs_proto_ops);
/* unregister all the ipvs protocols */
for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
while ((pp = ip_vs_proto_table[i]) != NULL)

View File

@ -41,28 +41,30 @@ struct isakmp_hdr {
#define PORT_ISAKMP 500
static void
ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
int inverse, struct ip_vs_conn_param *p)
ah_esp_conn_fill_param_proto(struct net *net, int af,
const struct ip_vs_iphdr *iph, int inverse,
struct ip_vs_conn_param *p)
{
if (likely(!inverse))
ip_vs_conn_fill_param(af, IPPROTO_UDP,
ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->saddr, htons(PORT_ISAKMP),
&iph->daddr, htons(PORT_ISAKMP), p);
else
ip_vs_conn_fill_param(af, IPPROTO_UDP,
ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->daddr, htons(PORT_ISAKMP),
&iph->saddr, htons(PORT_ISAKMP), p);
}
static struct ip_vs_conn *
ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
ah_esp_conn_in_get(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
struct net *net = skb_net(skb);
ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
cp = ip_vs_conn_in_get(&p);
if (!cp) {
/*
@ -72,7 +74,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
"%s%s %s->%s\n",
inverse ? "ICMP+" : "",
pp->name,
ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
}
@ -83,21 +85,21 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
static struct ip_vs_conn *
ah_esp_conn_out_get(int af, const struct sk_buff *skb,
struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse)
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
struct net *net = skb_net(skb);
ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
cp = ip_vs_conn_out_get(&p);
if (!cp) {
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
"%s%s %s->%s\n",
inverse ? "ICMP+" : "",
pp->name,
ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
}
@ -107,7 +109,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
static int
ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
/*
@ -117,26 +119,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0;
}
static void ah_esp_init(struct ip_vs_protocol *pp)
{
/* nothing to do now */
}
static void ah_esp_exit(struct ip_vs_protocol *pp)
{
/* nothing to do now */
}
#ifdef CONFIG_IP_VS_PROTO_AH
struct ip_vs_protocol ip_vs_protocol_ah = {
.name = "AH",
.protocol = IPPROTO_AH,
.num_states = 1,
.dont_defrag = 1,
.init = ah_esp_init,
.exit = ah_esp_exit,
.init = NULL,
.exit = NULL,
.conn_schedule = ah_esp_conn_schedule,
.conn_in_get = ah_esp_conn_in_get,
.conn_out_get = ah_esp_conn_out_get,
@ -149,7 +139,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
.app_conn_bind = NULL,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL, /* ISAKMP */
.set_state_timeout = NULL,
};
#endif
@ -159,8 +148,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
.protocol = IPPROTO_ESP,
.num_states = 1,
.dont_defrag = 1,
.init = ah_esp_init,
.exit = ah_esp_exit,
.init = NULL,
.exit = NULL,
.conn_schedule = ah_esp_conn_schedule,
.conn_in_get = ah_esp_conn_in_get,
.conn_out_get = ah_esp_conn_out_get,

View File

@ -9,9 +9,10 @@
#include <net/ip_vs.h>
static int
sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
struct net *net;
struct ip_vs_service *svc;
sctp_chunkhdr_t _schunkh, *sch;
sctp_sctphdr_t *sh, _sctph;
@ -27,13 +28,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
sizeof(_schunkh), &_schunkh);
if (sch == NULL)
return 0;
net = skb_net(skb);
if ((sch->type == SCTP_CID_INIT) &&
(svc = ip_vs_service_get(af, skb->mark, iph.protocol,
(svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
&iph.daddr, sh->dest))) {
int ignored;
if (ip_vs_todrop()) {
if (ip_vs_todrop(net_ipvs(net))) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
@ -46,14 +47,19 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
if (!*cpp && !ignored) {
*verdict = ip_vs_leave(svc, skb, pp);
*cpp = ip_vs_schedule(svc, skb, pd, &ignored);
if (!*cpp && ignored <= 0) {
if (!ignored)
*verdict = ip_vs_leave(svc, skb, pd);
else {
ip_vs_service_put(svc);
*verdict = NF_DROP;
}
return 0;
}
ip_vs_service_put(svc);
}
/* NF_ACCEPT */
return 1;
}
@ -856,7 +862,7 @@ static struct ipvs_sctp_nextstate
/*
* Timeout table[state]
*/
static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
[IP_VS_SCTP_S_NONE] = 2 * HZ,
[IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ,
[IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ,
@ -900,20 +906,8 @@ static const char *sctp_state_name(int state)
return "?";
}
static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
{
}
static int
sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
{
return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
sctp_state_name_table, sname, to);
}
static inline int
set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
int direction, const struct sk_buff *skb)
{
sctp_chunkhdr_t _sctpch, *sch;
@ -971,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
IP_VS_DBG_BUF(8, "%s %s %s:%d->"
"%s:%d state: %s->%s conn->refcnt:%d\n",
pp->name,
pd->pp->name,
((direction == IP_VS_DIR_OUTPUT) ?
"output " : "input "),
IP_VS_DBG_ADDR(cp->af, &cp->daddr),
@ -995,75 +989,73 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
}
}
}
if (likely(pd))
cp->timeout = pd->timeout_table[cp->state = next_state];
else /* What to do ? */
cp->timeout = sctp_timeouts[cp->state = next_state];
cp->timeout = pp->timeout_table[cp->state = next_state];
return 1;
return 1;
}
static int
sctp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb, struct ip_vs_protocol *pp)
const struct sk_buff *skb, struct ip_vs_proto_data *pd)
{
int ret = 0;
spin_lock(&cp->lock);
ret = set_sctp_state(pp, cp, direction, skb);
ret = set_sctp_state(pd, cp, direction, skb);
spin_unlock(&cp->lock);
return ret;
}
/*
* Hash table for SCTP application incarnations
*/
#define SCTP_APP_TAB_BITS 4
#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
static DEFINE_SPINLOCK(sctp_app_lock);
static inline __u16 sctp_app_hashkey(__be16 port)
{
return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
& SCTP_APP_TAB_MASK;
}
static int sctp_register_app(struct ip_vs_app *inc)
static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
hash = sctp_app_hashkey(port);
spin_lock_bh(&sctp_app_lock);
list_for_each_entry(i, &sctp_apps[hash], p_list) {
spin_lock_bh(&ipvs->sctp_app_lock);
list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
list_add(&inc->p_list, &sctp_apps[hash]);
atomic_inc(&ip_vs_protocol_sctp.appcnt);
list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
atomic_inc(&pd->appcnt);
out:
spin_unlock_bh(&sctp_app_lock);
spin_unlock_bh(&ipvs->sctp_app_lock);
return ret;
}
static void sctp_unregister_app(struct ip_vs_app *inc)
static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
spin_lock_bh(&sctp_app_lock);
atomic_dec(&ip_vs_protocol_sctp.appcnt);
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
spin_lock_bh(&ipvs->sctp_app_lock);
atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
spin_unlock_bh(&sctp_app_lock);
spin_unlock_bh(&ipvs->sctp_app_lock);
}
static int sctp_app_conn_bind(struct ip_vs_conn *cp)
{
struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash;
struct ip_vs_app *inc;
int result = 0;
@ -1074,12 +1066,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = sctp_app_hashkey(cp->vport);
spin_lock(&sctp_app_lock);
list_for_each_entry(inc, &sctp_apps[hash], p_list) {
spin_lock(&ipvs->sctp_app_lock);
list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
spin_unlock(&sctp_app_lock);
spin_unlock(&ipvs->sctp_app_lock);
IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@ -1095,43 +1087,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
goto out;
}
}
spin_unlock(&sctp_app_lock);
spin_unlock(&ipvs->sctp_app_lock);
out:
return result;
}
static void ip_vs_sctp_init(struct ip_vs_protocol *pp)
/* ---------------------------------------------
* timeouts is netns related now.
* ---------------------------------------------
*/
static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
{
IP_VS_INIT_HASH_TABLE(sctp_apps);
pp->timeout_table = sctp_timeouts;
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
spin_lock_init(&ipvs->tcp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
sizeof(sctp_timeouts));
}
static void ip_vs_sctp_exit(struct ip_vs_protocol *pp)
static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
{
kfree(pd->timeout_table);
}
struct ip_vs_protocol ip_vs_protocol_sctp = {
.name = "SCTP",
.protocol = IPPROTO_SCTP,
.num_states = IP_VS_SCTP_S_LAST,
.dont_defrag = 0,
.appcnt = ATOMIC_INIT(0),
.init = ip_vs_sctp_init,
.exit = ip_vs_sctp_exit,
.register_app = sctp_register_app,
.name = "SCTP",
.protocol = IPPROTO_SCTP,
.num_states = IP_VS_SCTP_S_LAST,
.dont_defrag = 0,
.init = NULL,
.exit = NULL,
.init_netns = __ip_vs_sctp_init,
.exit_netns = __ip_vs_sctp_exit,
.register_app = sctp_register_app,
.unregister_app = sctp_unregister_app,
.conn_schedule = sctp_conn_schedule,
.conn_in_get = ip_vs_conn_in_get_proto,
.conn_out_get = ip_vs_conn_out_get_proto,
.snat_handler = sctp_snat_handler,
.dnat_handler = sctp_dnat_handler,
.csum_check = sctp_csum_check,
.state_name = sctp_state_name,
.conn_schedule = sctp_conn_schedule,
.conn_in_get = ip_vs_conn_in_get_proto,
.conn_out_get = ip_vs_conn_out_get_proto,
.snat_handler = sctp_snat_handler,
.dnat_handler = sctp_dnat_handler,
.csum_check = sctp_csum_check,
.state_name = sctp_state_name,
.state_transition = sctp_state_transition,
.app_conn_bind = sctp_app_conn_bind,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = sctp_timeout_change,
.set_state_timeout = sctp_set_state_timeout,
.app_conn_bind = sctp_app_conn_bind,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL,
};

Some files were not shown because too many files have changed in this diff Show More