Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for your net-next
tree, more relevant updates in this batch are:

1) Add Maglev support to IPVS. Moreover, store lastest server weight in
   IPVS since this is needed by maglev, patches from from Inju Song.

2) Preparation works to add iptables flowtable support, patches
   from Felix Fietkau.

3) Hand over flows back to conntrack slow path in case of TCP RST/FIN
   packet is seen via new teardown state, also from Felix.

4) Add support for extended netlink error reporting for nf_tables.

5) Support for larger timeouts that 23 days in nf_tables, patch from
   Florian Westphal.

6) Always set an upper limit to dynamic sets, also from Florian.

7) Allow number generator to make map lookups, from Laura Garcia.

8) Use hash_32() instead of opencode hashing in IPVS, from Vicent Bernat.

9) Extend ip6tables SRH match to support previous, next and last SID,
   from Ahmed Abdelsalam.

10) Move Passive OS fingerprint nf_osf.c, from Fernando Fernandez.

11) Expose nf_conntrack_max through ctnetlink, from Florent Fourcot.

12) Several housekeeping patches for xt_NFLOG, x_tables and ebtables,
   from Taehee Yoo.

13) Unify meta bridge with core nft_meta, then make nft_meta built-in.
   Make rt and exthdr built-in too, again from Florian.

14) Missing initialization of tbl->entries in IPVS, from Cong Wang.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-05-06 21:51:37 -04:00
commit 90278871d4
104 changed files with 2758 additions and 1892 deletions

View File

@ -0,0 +1,27 @@
#include <uapi/linux/netfilter/nf_osf.h>
/* Initial window size option state machine: multiple of mss, mtu or
* plain numeric value. Can also be made as plain numeric value which
* is not a multiple of specified value.
*/
enum nf_osf_window_size_options {
OSF_WSS_PLAIN = 0,
OSF_WSS_MSS,
OSF_WSS_MTU,
OSF_WSS_MODULO,
OSF_WSS_MAX,
};
enum osf_fmatch_states {
/* Packet does not match the fingerprint */
FMATCH_WRONG = 0,
/* Packet matches the fingerprint */
FMATCH_OK,
/* Options do not match the fingerprint, but header does */
FMATCH_OPT_WRONG,
};
bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
int hooknum, struct net_device *in, struct net_device *out,
const struct nf_osf_info *info, struct net *net,
const struct list_head *nf_osf_fingers);

View File

@ -17,10 +17,6 @@
#include <linux/if_ether.h>
#include <uapi/linux/netfilter_bridge/ebtables.h>
/* return values for match() functions */
#define EBT_MATCH 0
#define EBT_NOMATCH 1
struct ebt_match {
struct list_head list;
const char name[EBT_FUNCTION_MAXNAMELEN];

View File

@ -279,6 +279,27 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
!lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate);
}
static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
{
struct inet6_dev *idev;
unsigned int mtu;
if (dst_metric_locked(dst, RTAX_MTU)) {
mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
return mtu;
}
mtu = IPV6_MIN_MTU;
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
if (idev)
mtu = idev->cnf.mtu6;
rcu_read_unlock();
return mtu;
}
struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
struct net_device *dev, struct sk_buff *skb,
const void *daddr);

View File

@ -668,6 +668,7 @@ struct ip_vs_dest {
volatile unsigned int flags; /* dest status flags */
atomic_t conn_flags; /* flags to copy to conn */
atomic_t weight; /* server weight */
atomic_t last_weight; /* server latest weight */
refcount_t refcnt; /* reference counter */
struct ip_vs_stats stats; /* statistics */

View File

@ -960,8 +960,6 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
&inet6_sk(sk)->cork);
}
unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6);
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,

View File

@ -6,7 +6,7 @@
unsigned int
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
const struct net_device *out);
void nf_nat_masquerade_ipv4_register_notifier(void);

View File

@ -3,7 +3,7 @@
#define _NF_NAT_MASQUERADE_IPV6_H_
unsigned int
nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
const struct net_device *out);
void nf_nat_masquerade_ipv6_register_notifier(void);
void nf_nat_masquerade_ipv6_unregister_notifier(void);

View File

@ -6,6 +6,7 @@
#include <linux/netdevice.h>
#include <linux/rhashtable.h>
#include <linux/rcupdate.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/dst.h>
struct nf_flowtable;
@ -13,25 +14,24 @@ struct nf_flowtable;
struct nf_flowtable_type {
struct list_head list;
int family;
void (*gc)(struct work_struct *work);
int (*init)(struct nf_flowtable *ft);
void (*free)(struct nf_flowtable *ft);
const struct rhashtable_params *params;
nf_hookfn *hook;
struct module *owner;
};
struct nf_flowtable {
struct list_head list;
struct rhashtable rhashtable;
const struct nf_flowtable_type *type;
struct delayed_work gc_work;
};
enum flow_offload_tuple_dir {
FLOW_OFFLOAD_DIR_ORIGINAL,
FLOW_OFFLOAD_DIR_REPLY,
__FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY,
FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
};
#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1)
struct flow_offload_tuple {
union {
@ -55,6 +55,8 @@ struct flow_offload_tuple {
int oifidx;
u16 mtu;
struct dst_entry *dst_cache;
};
@ -66,6 +68,7 @@ struct flow_offload_tuple_rhash {
#define FLOW_OFFLOAD_SNAT 0x1
#define FLOW_OFFLOAD_DNAT 0x2
#define FLOW_OFFLOAD_DYING 0x4
#define FLOW_OFFLOAD_TEARDOWN 0x8
struct flow_offload {
struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
@ -98,11 +101,14 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
int nf_flow_table_init(struct nf_flowtable *flow_table);
void nf_flow_table_free(struct nf_flowtable *flow_table);
void nf_flow_offload_work_gc(struct work_struct *work);
extern const struct rhashtable_params nf_flow_offload_rhash_params;
void flow_offload_dead(struct flow_offload *flow);
void flow_offload_teardown(struct flow_offload *flow);
static inline void flow_offload_dead(struct flow_offload *flow)
{
flow->flags |= FLOW_OFFLOAD_DYING;
}
int nf_flow_snat_port(const struct flow_offload *flow,
struct sk_buff *skb, unsigned int thoff,

View File

@ -39,7 +39,7 @@ struct nf_conn_nat {
/* Set up the info structure to map into this range. */
unsigned int nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype);
extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct,

View File

@ -7,7 +7,7 @@ struct nf_nat_l3proto {
u8 l3proto;
bool (*in_range)(const struct nf_conntrack_tuple *t,
const struct nf_nat_range *range);
const struct nf_nat_range2 *range);
u32 (*secure_port)(const struct nf_conntrack_tuple *t, __be16);
@ -33,7 +33,7 @@ struct nf_nat_l3proto {
struct flowi *fl);
int (*nlattr_to_range)(struct nlattr *tb[],
struct nf_nat_range *range);
struct nf_nat_range2 *range);
};
int nf_nat_l3proto_register(const struct nf_nat_l3proto *);
@ -48,30 +48,26 @@ unsigned int nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct));
const struct nf_hook_state *state));
unsigned int nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct));
const struct nf_hook_state *state));
unsigned int nf_nat_ipv4_local_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct));
const struct nf_hook_state *state));
unsigned int nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct));
const struct nf_hook_state *state));
int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@ -81,29 +77,25 @@ unsigned int nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct));
const struct nf_hook_state *state));
unsigned int nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct));
const struct nf_hook_state *state));
unsigned int nf_nat_ipv6_local_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct));
const struct nf_hook_state *state));
unsigned int nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct));
const struct nf_hook_state *state));
#endif /* _NF_NAT_L3PROTO_H */

View File

@ -34,12 +34,12 @@ struct nf_nat_l4proto {
*/
void (*unique_tuple)(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct);
int (*nlattr_to_range)(struct nlattr *tb[],
struct nf_nat_range *range);
struct nf_nat_range2 *range);
};
/* Protocol registration. */
@ -72,11 +72,11 @@ bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct, u16 *rover);
int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range *range);
struct nf_nat_range2 *range);
#endif /*_NF_NAT_L4PROTO_H*/

View File

@ -7,7 +7,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
const struct nf_nat_ipv4_multi_range_compat *mr,
unsigned int hooknum);
unsigned int
nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
unsigned int hooknum);
#endif /* _NF_NAT_REDIRECT_H_ */

View File

@ -275,23 +275,6 @@ struct nft_set_estimate {
enum nft_set_class space;
};
/**
* struct nft_set_type - nf_tables set type
*
* @select_ops: function to select nft_set_ops
* @ops: default ops, used when no select_ops functions is present
* @list: used internally
* @owner: module reference
*/
struct nft_set_type {
const struct nft_set_ops *(*select_ops)(const struct nft_ctx *,
const struct nft_set_desc *desc,
u32 flags);
const struct nft_set_ops *ops;
struct list_head list;
struct module *owner;
};
struct nft_set_ext;
struct nft_expr;
@ -310,7 +293,6 @@ struct nft_expr;
* @init: initialize private data of new set instance
* @destroy: destroy private data of set instance
* @elemsize: element private size
* @features: features supported by the implementation
*/
struct nft_set_ops {
bool (*lookup)(const struct net *net,
@ -361,10 +343,24 @@ struct nft_set_ops {
void (*destroy)(const struct nft_set *set);
unsigned int elemsize;
u32 features;
const struct nft_set_type *type;
};
/**
* struct nft_set_type - nf_tables set type
*
* @ops: set ops for this type
* @list: used internally
* @owner: module reference
* @features: features supported by the implementation
*/
struct nft_set_type {
const struct nft_set_ops ops;
struct list_head list;
struct module *owner;
u32 features;
};
#define to_set_type(o) container_of(o, struct nft_set_type, ops)
int nft_register_set(struct nft_set_type *type);
void nft_unregister_set(struct nft_set_type *type);
@ -589,7 +585,7 @@ static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext)
return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
}
static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext)
static inline u64 *nft_set_ext_expiration(const struct nft_set_ext *ext)
{
return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION);
}
@ -607,7 +603,7 @@ static inline struct nft_expr *nft_set_ext_expr(const struct nft_set_ext *ext)
static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
{
return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
time_is_before_eq_jiffies(*nft_set_ext_expiration(ext));
time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext));
}
static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
@ -1015,9 +1011,9 @@ static inline void *nft_obj_data(const struct nft_object *obj)
#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr))
struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
const struct nlattr *nla, u32 objtype,
u8 genmask);
struct nft_object *nft_obj_lookup(const struct nft_table *table,
const struct nlattr *nla, u32 objtype,
u8 genmask);
void nft_obj_notify(struct net *net, struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq,
@ -1106,12 +1102,9 @@ struct nft_flowtable {
struct nf_flowtable data;
};
struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table,
const struct nlattr *nla,
u8 genmask);
void nft_flow_table_iterate(struct net *net,
void (*iter)(struct nf_flowtable *flowtable, void *data),
void *data);
struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
const struct nlattr *nla,
u8 genmask);
void nft_register_flowtable_type(struct nf_flowtable_type *type);
void nft_unregister_flowtable_type(struct nf_flowtable_type *type);

View File

@ -10,6 +10,9 @@ extern struct nft_expr_type nft_byteorder_type;
extern struct nft_expr_type nft_payload_type;
extern struct nft_expr_type nft_dynset_type;
extern struct nft_expr_type nft_range_type;
extern struct nft_expr_type nft_meta_type;
extern struct nft_expr_type nft_rt_type;
extern struct nft_expr_type nft_exthdr_type;
int nf_tables_core_module_init(void);
void nf_tables_core_module_exit(void);

View File

@ -1,18 +1 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _KER_NFNETLINK_LOG_H
#define _KER_NFNETLINK_LOG_H
void
nfulnl_log_packet(struct net *net,
u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const struct nf_loginfo *li_user,
const char *prefix);
#define NFULNL_COPY_DISABLED 0xff
#endif /* _KER_NFNETLINK_LOG_H */

View File

@ -1,44 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NFT_META_H_
#define _NFT_META_H_
struct nft_meta {
enum nft_meta_keys key:8;
union {
enum nft_registers dreg:8;
enum nft_registers sreg:8;
};
};
extern const struct nla_policy nft_meta_policy[];
int nft_meta_get_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[]);
int nft_meta_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[]);
int nft_meta_get_dump(struct sk_buff *skb,
const struct nft_expr *expr);
int nft_meta_set_dump(struct sk_buff *skb,
const struct nft_expr *expr);
void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt);
void nft_meta_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt);
void nft_meta_set_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr);
int nft_meta_set_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data);
#endif

View File

@ -10,6 +10,7 @@
#define NF_NAT_RANGE_PROTO_RANDOM (1 << 2)
#define NF_NAT_RANGE_PERSISTENT (1 << 3)
#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4)
#define NF_NAT_RANGE_PROTO_OFFSET (1 << 5)
#define NF_NAT_RANGE_PROTO_RANDOM_ALL \
(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@ -17,7 +18,7 @@
#define NF_NAT_RANGE_MASK \
(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED | \
NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT | \
NF_NAT_RANGE_PROTO_RANDOM_FULLY)
NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET)
struct nf_nat_ipv4_range {
unsigned int flags;
@ -40,4 +41,13 @@ struct nf_nat_range {
union nf_conntrack_man_proto max_proto;
};
struct nf_nat_range2 {
unsigned int flags;
union nf_inet_addr min_addr;
union nf_inet_addr max_addr;
union nf_conntrack_man_proto min_proto;
union nf_conntrack_man_proto max_proto;
union nf_conntrack_man_proto base_proto;
};
#endif /* _NETFILTER_NF_NAT_H */

View File

@ -0,0 +1,90 @@
#ifndef _NF_OSF_H
#define _NF_OSF_H
#define MAXGENRELEN 32
#define NF_OSF_GENRE (1 << 0)
#define NF_OSF_TTL (1 << 1)
#define NF_OSF_LOG (1 << 2)
#define NF_OSF_INVERT (1 << 3)
#define NF_OSF_LOGLEVEL_ALL 0 /* log all matched fingerprints */
#define NF_OSF_LOGLEVEL_FIRST 1 /* log only the first matced fingerprint */
#define NF_OSF_LOGLEVEL_ALL_KNOWN 2 /* do not log unknown packets */
#define NF_OSF_TTL_TRUE 0 /* True ip and fingerprint TTL comparison */
/* Do not compare ip and fingerprint TTL at all */
#define NF_OSF_TTL_NOCHECK 2
/* Wildcard MSS (kind of).
* It is used to implement a state machine for the different wildcard values
* of the MSS and window sizes.
*/
struct nf_osf_wc {
__u32 wc;
__u32 val;
};
/* This struct represents IANA options
* http://www.iana.org/assignments/tcp-parameters
*/
struct nf_osf_opt {
__u16 kind, length;
struct nf_osf_wc wc;
};
struct nf_osf_info {
char genre[MAXGENRELEN];
__u32 len;
__u32 flags;
__u32 loglevel;
__u32 ttl;
};
struct nf_osf_user_finger {
struct nf_osf_wc wss;
__u8 ttl, df;
__u16 ss, mss;
__u16 opt_num;
char genre[MAXGENRELEN];
char version[MAXGENRELEN];
char subtype[MAXGENRELEN];
/* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
struct nf_osf_opt opt[MAX_IPOPTLEN];
};
struct nf_osf_finger {
struct rcu_head rcu_head;
struct list_head finger_entry;
struct nf_osf_user_finger finger;
};
struct nf_osf_nlmsg {
struct nf_osf_user_finger f;
struct iphdr ip;
struct tcphdr tcp;
};
/* Defines for IANA option kinds */
enum iana_options {
OSFOPT_EOL = 0, /* End of options */
OSFOPT_NOP, /* NOP */
OSFOPT_MSS, /* Maximum segment size */
OSFOPT_WSO, /* Window scale option */
OSFOPT_SACKP, /* SACK permitted */
OSFOPT_SACK, /* SACK */
OSFOPT_ECHO,
OSFOPT_ECHOREPLY,
OSFOPT_TS, /* Timestamp option */
OSFOPT_POCP, /* Partial Order Connection Permitted */
OSFOPT_POSP, /* Partial Order Service Profile */
/* Others are not used in the current OSF */
OSFOPT_EMPTY = 255,
};
#endif /* _NF_OSF_H */

View File

@ -831,7 +831,9 @@ enum nft_rt_keys {
NFT_RT_NEXTHOP4,
NFT_RT_NEXTHOP6,
NFT_RT_TCPMSS,
__NFT_RT_MAX
};
#define NFT_RT_MAX (__NFT_RT_MAX - 1)
/**
* enum nft_hash_types - nf_tables hash expression types
@ -949,7 +951,9 @@ enum nft_ct_keys {
NFT_CT_DST_IP,
NFT_CT_SRC_IP6,
NFT_CT_DST_IP6,
__NFT_CT_MAX
};
#define NFT_CT_MAX (__NFT_CT_MAX - 1)
/**
* enum nft_ct_attributes - nf_tables ct expression netlink attributes
@ -1450,6 +1454,8 @@ enum nft_trace_types {
* @NFTA_NG_MODULUS: maximum counter value (NLA_U32)
* @NFTA_NG_TYPE: operation type (NLA_U32)
* @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32)
* @NFTA_NG_SET_NAME: name of the map to lookup (NLA_STRING)
* @NFTA_NG_SET_ID: id of the map (NLA_U32)
*/
enum nft_ng_attributes {
NFTA_NG_UNSPEC,
@ -1457,6 +1463,8 @@ enum nft_ng_attributes {
NFTA_NG_MODULUS,
NFTA_NG_TYPE,
NFTA_NG_OFFSET,
NFTA_NG_SET_NAME,
NFTA_NG_SET_ID,
__NFTA_NG_MAX
};
#define NFTA_NG_MAX (__NFTA_NG_MAX - 1)

View File

@ -262,6 +262,7 @@ enum ctattr_stats_cpu {
enum ctattr_stats_global {
CTA_STATS_GLOBAL_UNSPEC,
CTA_STATS_GLOBAL_ENTRIES,
CTA_STATS_GLOBAL_MAX_ENTRIES,
__CTA_STATS_GLOBAL_MAX,
};
#define CTA_STATS_GLOBAL_MAX (__CTA_STATS_GLOBAL_MAX - 1)

View File

@ -23,101 +23,29 @@
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/netfilter/nf_osf.h>
#define MAXGENRELEN 32
#define XT_OSF_GENRE NF_OSF_GENRE
#define XT_OSF_INVERT NF_OSF_INVERT
#define XT_OSF_GENRE (1<<0)
#define XT_OSF_TTL (1<<1)
#define XT_OSF_LOG (1<<2)
#define XT_OSF_INVERT (1<<3)
#define XT_OSF_TTL NF_OSF_TTL
#define XT_OSF_LOG NF_OSF_LOG
#define XT_OSF_LOGLEVEL_ALL 0 /* log all matched fingerprints */
#define XT_OSF_LOGLEVEL_FIRST 1 /* log only the first matced fingerprint */
#define XT_OSF_LOGLEVEL_ALL_KNOWN 2 /* do not log unknown packets */
#define XT_OSF_LOGLEVEL_ALL NF_OSF_LOGLEVEL_ALL
#define XT_OSF_LOGLEVEL_FIRST NF_OSF_LOGLEVEL_FIRST
#define XT_OSF_LOGLEVEL_ALL_KNOWN NF_OSF_LOGLEVEL_ALL_KNOWN
#define XT_OSF_TTL_TRUE 0 /* True ip and fingerprint TTL comparison */
#define XT_OSF_TTL_LESS 1 /* Check if ip TTL is less than fingerprint one */
#define XT_OSF_TTL_NOCHECK 2 /* Do not compare ip and fingerprint TTL at all */
#define XT_OSF_TTL_TRUE NF_OSF_TTL_TRUE
#define XT_OSF_TTL_NOCHECK NF_OSF_TTL_NOCHECK
struct xt_osf_info {
char genre[MAXGENRELEN];
__u32 len;
__u32 flags;
__u32 loglevel;
__u32 ttl;
};
#define XT_OSF_TTL_LESS 1 /* Check if ip TTL is less than fingerprint one */
/*
* Wildcard MSS (kind of).
* It is used to implement a state machine for the different wildcard values
* of the MSS and window sizes.
*/
struct xt_osf_wc {
__u32 wc;
__u32 val;
};
/*
* This struct represents IANA options
* http://www.iana.org/assignments/tcp-parameters
*/
struct xt_osf_opt {
__u16 kind, length;
struct xt_osf_wc wc;
};
struct xt_osf_user_finger {
struct xt_osf_wc wss;
__u8 ttl, df;
__u16 ss, mss;
__u16 opt_num;
char genre[MAXGENRELEN];
char version[MAXGENRELEN];
char subtype[MAXGENRELEN];
/* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
struct xt_osf_opt opt[MAX_IPOPTLEN];
};
struct xt_osf_nlmsg {
struct xt_osf_user_finger f;
struct iphdr ip;
struct tcphdr tcp;
};
/* Defines for IANA option kinds */
enum iana_options {
OSFOPT_EOL = 0, /* End of options */
OSFOPT_NOP, /* NOP */
OSFOPT_MSS, /* Maximum segment size */
OSFOPT_WSO, /* Window scale option */
OSFOPT_SACKP, /* SACK permitted */
OSFOPT_SACK, /* SACK */
OSFOPT_ECHO,
OSFOPT_ECHOREPLY,
OSFOPT_TS, /* Timestamp option */
OSFOPT_POCP, /* Partial Order Connection Permitted */
OSFOPT_POSP, /* Partial Order Service Profile */
/* Others are not used in the current OSF */
OSFOPT_EMPTY = 255,
};
/*
* Initial window size option state machine: multiple of mss, mtu or
* plain numeric value. Can also be made as plain numeric value which
* is not a multiple of specified value.
*/
enum xt_osf_window_size_options {
OSF_WSS_PLAIN = 0,
OSF_WSS_MSS,
OSF_WSS_MTU,
OSF_WSS_MODULO,
OSF_WSS_MAX,
};
#define xt_osf_wc nf_osf_wc
#define xt_osf_opt nf_osf_opt
#define xt_osf_info nf_osf_info
#define xt_osf_user_finger nf_osf_user_finger
#define xt_osf_finger nf_osf_finger
#define xt_osf_nlmsg nf_osf_nlmsg
/*
* Add/remove fingerprint from the kernel.

View File

@ -191,6 +191,12 @@ struct ebt_entry {
unsigned char elems[0] __attribute__ ((aligned (__alignof__(struct ebt_replace))));
};
static __inline__ struct ebt_entry_target *
ebt_get_target(struct ebt_entry *e)
{
return (void *)e + e->target_offset;
}
/* {g,s}etsockopt numbers */
#define EBT_BASE_CTL 128

View File

@ -17,7 +17,10 @@
#define IP6T_SRH_LAST_GT 0x0100
#define IP6T_SRH_LAST_LT 0x0200
#define IP6T_SRH_TAG 0x0400
#define IP6T_SRH_MASK 0x07FF
#define IP6T_SRH_PSID 0x0800
#define IP6T_SRH_NSID 0x1000
#define IP6T_SRH_LSID 0x2000
#define IP6T_SRH_MASK 0x3FFF
/* Values for "mt_invflags" field in struct ip6t_srh */
#define IP6T_SRH_INV_NEXTHDR 0x0001
@ -31,7 +34,10 @@
#define IP6T_SRH_INV_LAST_GT 0x0100
#define IP6T_SRH_INV_LAST_LT 0x0200
#define IP6T_SRH_INV_TAG 0x0400
#define IP6T_SRH_INV_MASK 0x07FF
#define IP6T_SRH_INV_PSID 0x0800
#define IP6T_SRH_INV_NSID 0x1000
#define IP6T_SRH_INV_LSID 0x2000
#define IP6T_SRH_INV_MASK 0x3FFF
/**
* struct ip6t_srh - SRH match options
@ -54,4 +60,37 @@ struct ip6t_srh {
__u16 mt_invflags;
};
/**
* struct ip6t_srh1 - SRH match options (revision 1)
* @ next_hdr: Next header field of SRH
* @ hdr_len: Extension header length field of SRH
* @ segs_left: Segments left field of SRH
* @ last_entry: Last entry field of SRH
* @ tag: Tag field of SRH
* @ psid_addr: Address of previous SID in SRH SID list
* @ nsid_addr: Address of NEXT SID in SRH SID list
* @ lsid_addr: Address of LAST SID in SRH SID list
* @ psid_msk: Mask of previous SID in SRH SID list
* @ nsid_msk: Mask of next SID in SRH SID list
* @ lsid_msk: MAsk of last SID in SRH SID list
* @ mt_flags: match options
* @ mt_invflags: Invert the sense of match options
*/
struct ip6t_srh1 {
__u8 next_hdr;
__u8 hdr_len;
__u8 segs_left;
__u8 last_entry;
__u16 tag;
struct in6_addr psid_addr;
struct in6_addr nsid_addr;
struct in6_addr lsid_addr;
struct in6_addr psid_msk;
struct in6_addr nsid_msk;
struct in6_addr lsid_msk;
__u16 mt_flags;
__u16 mt_invflags;
};
#endif /*_IP6T_SRH_H*/

View File

@ -8,13 +8,6 @@ menuconfig NF_TABLES_BRIDGE
bool "Ethernet Bridge nf_tables support"
if NF_TABLES_BRIDGE
config NFT_BRIDGE_META
tristate "Netfilter nf_table bridge meta support"
depends on NFT_META
help
Add support for bridge dedicated meta key.
config NFT_BRIDGE_REJECT
tristate "Netfilter nf_tables bridge reject support"
depends on NFT_REJECT && NFT_REJECT_IPV4 && NFT_REJECT_IPV6

View File

@ -3,7 +3,6 @@
# Makefile for the netfilter modules for Link Layer filtering on a bridge.
#
obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o
obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
# packet logging

View File

@ -101,7 +101,7 @@ ebt_do_match(struct ebt_entry_match *m, const struct sk_buff *skb,
{
par->match = m->u.match;
par->matchinfo = m->data;
return m->u.match->match(skb, par) ? EBT_MATCH : EBT_NOMATCH;
return !m->u.match->match(skb, par);
}
static inline int
@ -177,6 +177,12 @@ struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
return (void *)entry + entry->next_offset;
}
static inline const struct ebt_entry_target *
ebt_get_target_c(const struct ebt_entry *e)
{
return ebt_get_target((struct ebt_entry *)e);
}
/* Do some firewalling */
unsigned int ebt_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
@ -230,8 +236,7 @@ unsigned int ebt_do_table(struct sk_buff *skb,
*/
EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar);
t = (struct ebt_entry_target *)
(((char *)point) + point->target_offset);
t = ebt_get_target_c(point);
/* standard target */
if (!t->u.target->target)
verdict = ((struct ebt_standard_target *)t)->verdict;
@ -343,6 +348,16 @@ find_table_lock(struct net *net, const char *name, int *error,
"ebtable_", error, mutex);
}
static inline void ebt_free_table_info(struct ebt_table_info *info)
{
int i;
if (info->chainstack) {
for_each_possible_cpu(i)
vfree(info->chainstack[i]);
vfree(info->chainstack);
}
}
static inline int
ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
unsigned int *cnt)
@ -627,7 +642,7 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt)
return 1;
EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL);
EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL);
t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
t = ebt_get_target(e);
par.net = net;
par.target = t->u.target;
@ -706,7 +721,7 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, &tgpar, &j);
if (ret != 0)
goto cleanup_watchers;
t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
t = ebt_get_target(e);
gap = e->next_offset - e->target_offset;
target = xt_request_find_target(NFPROTO_BRIDGE, t->u.name, 0);
@ -779,8 +794,7 @@ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack
if (pos == nentries)
continue;
}
t = (struct ebt_entry_target *)
(((char *)e) + e->target_offset);
t = ebt_get_target_c(e);
if (strcmp(t->u.name, EBT_STANDARD_TARGET))
goto letscontinue;
if (e->target_offset + sizeof(struct ebt_standard_target) >
@ -975,7 +989,7 @@ static void get_counters(const struct ebt_counter *oldcounters,
static int do_replace_finish(struct net *net, struct ebt_replace *repl,
struct ebt_table_info *newinfo)
{
int ret, i;
int ret;
struct ebt_counter *counterstmp = NULL;
/* used to be able to unlock earlier */
struct ebt_table_info *table;
@ -1051,13 +1065,8 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
ebt_cleanup_entry, net, NULL);
vfree(table->entries);
if (table->chainstack) {
for_each_possible_cpu(i)
vfree(table->chainstack[i]);
vfree(table->chainstack);
}
ebt_free_table_info(table);
vfree(table);
vfree(counterstmp);
#ifdef CONFIG_AUDIT
@ -1078,11 +1087,7 @@ free_iterate:
free_counterstmp:
vfree(counterstmp);
/* can be initialized in translate_table() */
if (newinfo->chainstack) {
for_each_possible_cpu(i)
vfree(newinfo->chainstack[i]);
vfree(newinfo->chainstack);
}
ebt_free_table_info(newinfo);
return ret;
}
@ -1147,8 +1152,6 @@ free_newinfo:
static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
{
int i;
mutex_lock(&ebt_mutex);
list_del(&table->list);
mutex_unlock(&ebt_mutex);
@ -1157,11 +1160,7 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
if (table->private->nentries)
module_put(table->me);
vfree(table->private->entries);
if (table->private->chainstack) {
for_each_possible_cpu(i)
vfree(table->private->chainstack[i]);
vfree(table->private->chainstack);
}
ebt_free_table_info(table->private);
vfree(table->private);
kfree(table);
}
@ -1263,11 +1262,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
free_unlock:
mutex_unlock(&ebt_mutex);
free_chainstack:
if (newinfo->chainstack) {
for_each_possible_cpu(i)
vfree(newinfo->chainstack[i]);
vfree(newinfo->chainstack);
}
ebt_free_table_info(newinfo);
vfree(newinfo->entries);
free_newinfo:
vfree(newinfo);
@ -1405,7 +1400,7 @@ static inline int ebt_entry_to_user(struct ebt_entry *e, const char *base,
return -EFAULT;
hlp = ubase + (((char *)e + e->target_offset) - base);
t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
t = ebt_get_target_c(e);
ret = EBT_MATCH_ITERATE(e, ebt_match_to_user, base, ubase);
if (ret != 0)
@ -1746,7 +1741,7 @@ static int compat_copy_entry_to_user(struct ebt_entry *e, void __user **dstptr,
return ret;
target_offset = e->target_offset - (origsize - *size);
t = (struct ebt_entry_target *) ((char *) e + e->target_offset);
t = ebt_get_target(e);
ret = compat_target_to_user(t, dstptr, size);
if (ret)
@ -1794,7 +1789,7 @@ static int compat_calc_entry(const struct ebt_entry *e,
EBT_MATCH_ITERATE(e, compat_calc_match, &off);
EBT_WATCHER_ITERATE(e, compat_calc_watcher, &off);
t = (const struct ebt_entry_target *) ((char *) e + e->target_offset);
t = ebt_get_target_c(e);
off += xt_compat_target_offset(t->u.target);
off += ebt_compat_entry_padsize();

View File

@ -1,135 +0,0 @@
/*
* Copyright (c) 2014 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_meta.h>
#include "../br_private.h"
static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
u32 *dest = &regs->data[priv->dreg];
const struct net_bridge_port *p;
switch (priv->key) {
case NFT_META_BRI_IIFNAME:
if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
goto err;
break;
case NFT_META_BRI_OIFNAME:
if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
goto err;
break;
default:
goto out;
}
strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
return;
out:
return nft_meta_get_eval(expr, regs, pkt);
err:
regs->verdict.code = NFT_BREAK;
}
static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int len;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
switch (priv->key) {
case NFT_META_BRI_IIFNAME:
case NFT_META_BRI_OIFNAME:
len = IFNAMSIZ;
break;
default:
return nft_meta_get_init(ctx, expr, tb);
}
priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
return nft_validate_register_store(ctx, priv->dreg, NULL,
NFT_DATA_VALUE, len);
}
static struct nft_expr_type nft_meta_bridge_type;
static const struct nft_expr_ops nft_meta_bridge_get_ops = {
.type = &nft_meta_bridge_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_bridge_get_eval,
.init = nft_meta_bridge_get_init,
.dump = nft_meta_get_dump,
};
static const struct nft_expr_ops nft_meta_bridge_set_ops = {
.type = &nft_meta_bridge_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
.init = nft_meta_set_init,
.destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
.validate = nft_meta_set_validate,
};
static const struct nft_expr_ops *
nft_meta_bridge_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
if (tb[NFTA_META_KEY] == NULL)
return ERR_PTR(-EINVAL);
if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
return ERR_PTR(-EINVAL);
if (tb[NFTA_META_DREG])
return &nft_meta_bridge_get_ops;
if (tb[NFTA_META_SREG])
return &nft_meta_bridge_set_ops;
return ERR_PTR(-EINVAL);
}
static struct nft_expr_type nft_meta_bridge_type __read_mostly = {
.family = NFPROTO_BRIDGE,
.name = "meta",
.select_ops = nft_meta_bridge_select_ops,
.policy = nft_meta_policy,
.maxattr = NFTA_META_MAX,
.owner = THIS_MODULE,
};
static int __init nft_meta_bridge_module_init(void)
{
return nft_register_expr(&nft_meta_bridge_type);
}
static void __exit nft_meta_bridge_module_exit(void)
{
nft_unregister_expr(&nft_meta_bridge_type);
}
module_init(nft_meta_bridge_module_init);
module_exit(nft_meta_bridge_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta");

View File

@ -300,7 +300,7 @@ ipt_do_table(struct sk_buff *skb,
counter = xt_get_this_cpu_counter(&e->counters);
ADD_COUNTER(*counter, skb->len, 1);
t = ipt_get_target(e);
t = ipt_get_target_c(e);
WARN_ON(!t->u.kernel.target);
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)

View File

@ -47,7 +47,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
static unsigned int
masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
struct nf_nat_range range;
struct nf_nat_range2 range;
const struct nf_nat_ipv4_multi_range_compat *mr;
mr = par->targinfo;

View File

@ -33,8 +33,7 @@ static const struct xt_table nf_nat_ipv4_table = {
static unsigned int iptable_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct)
const struct nf_hook_state *state)
{
return ipt_do_table(skb, state, state->net->ipv4.nat_table);
}

View File

@ -2,265 +2,12 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <linux/ip.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/neighbour.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
/* For layer 4 checksum field offset. */
#include <linux/tcp.h>
#include <linux/udp.h>
static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
__be32 addr, __be32 new_addr)
{
struct tcphdr *tcph;
if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
skb_try_make_writable(skb, thoff + sizeof(*tcph)))
return -1;
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
return 0;
}
static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
__be32 addr, __be32 new_addr)
{
struct udphdr *udph;
if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
skb_try_make_writable(skb, thoff + sizeof(*udph)))
return -1;
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace4(&udph->check, skb, addr,
new_addr, true);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
return 0;
}
static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
unsigned int thoff, __be32 addr,
__be32 new_addr)
{
switch (iph->protocol) {
case IPPROTO_TCP:
if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
return NF_DROP;
break;
case IPPROTO_UDP:
if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
return NF_DROP;
break;
}
return 0;
}
static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
struct iphdr *iph, unsigned int thoff,
enum flow_offload_tuple_dir dir)
{
__be32 addr, new_addr;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = iph->saddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
iph->saddr = new_addr;
break;
case FLOW_OFFLOAD_DIR_REPLY:
addr = iph->daddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
iph->daddr = new_addr;
break;
default:
return -1;
}
csum_replace4(&iph->check, addr, new_addr);
return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}
static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
struct iphdr *iph, unsigned int thoff,
enum flow_offload_tuple_dir dir)
{
__be32 addr, new_addr;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = iph->daddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
iph->daddr = new_addr;
break;
case FLOW_OFFLOAD_DIR_REPLY:
addr = iph->saddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
iph->saddr = new_addr;
break;
default:
return -1;
}
csum_replace4(&iph->check, addr, new_addr);
return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}
static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
enum flow_offload_tuple_dir dir)
{
struct iphdr *iph = ip_hdr(skb);
unsigned int thoff = iph->ihl * 4;
if (flow->flags & FLOW_OFFLOAD_SNAT &&
(nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
return -1;
if (flow->flags & FLOW_OFFLOAD_DNAT &&
(nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
return -1;
return 0;
}
static bool ip_has_options(unsigned int thoff)
{
return thoff != sizeof(struct iphdr);
}
static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
struct flow_offload_tuple *tuple)
{
struct flow_ports *ports;
unsigned int thoff;
struct iphdr *iph;
if (!pskb_may_pull(skb, sizeof(*iph)))
return -1;
iph = ip_hdr(skb);
thoff = iph->ihl * 4;
if (ip_is_fragment(iph) ||
unlikely(ip_has_options(thoff)))
return -1;
if (iph->protocol != IPPROTO_TCP &&
iph->protocol != IPPROTO_UDP)
return -1;
thoff = iph->ihl * 4;
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v4.s_addr = iph->saddr;
tuple->dst_v4.s_addr = iph->daddr;
tuple->src_port = ports->source;
tuple->dst_port = ports->dest;
tuple->l3proto = AF_INET;
tuple->l4proto = iph->protocol;
tuple->iifidx = dev->ifindex;
return 0;
}
/* Based on ip_exceeds_mtu(). */
static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)
return false;
if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
return false;
if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
return false;
return true;
}
static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rtable *rt)
{
u32 mtu;
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
if (__nf_flow_exceeds_mtu(skb, mtu))
return true;
return false;
}
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct flow_offload_tuple_rhash *tuplehash;
struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple tuple = {};
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
struct net_device *outdev;
const struct rtable *rt;
struct iphdr *iph;
__be32 nexthop;
if (skb->protocol != htons(ETH_P_IP))
return NF_ACCEPT;
if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
if (tuplehash == NULL)
return NF_ACCEPT;
outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
if (!outdev)
return NF_ACCEPT;
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
return NF_ACCEPT;
if (skb_try_make_writable(skb, sizeof(*iph)))
return NF_DROP;
if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
nf_flow_nat_ip(flow, skb, dir) < 0)
return NF_DROP;
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
iph = ip_hdr(skb);
ip_decrease_ttl(iph);
skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
return NF_STOLEN;
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
static struct nf_flowtable_type flowtable_ipv4 = {
.family = NFPROTO_IPV4,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
.init = nf_flow_table_init,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ip_hook,
.owner = THIS_MODULE,

View File

@ -395,7 +395,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
static void ip_nat_q931_expect(struct nf_conn *new,
struct nf_conntrack_expect *this)
{
struct nf_nat_range range;
struct nf_nat_range2 range;
if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
nf_nat_follow_master(new, this);
@ -497,7 +497,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
static void ip_nat_callforwarding_expect(struct nf_conn *new,
struct nf_conntrack_expect *this)
{
struct nf_nat_range range;
struct nf_nat_range2 range;
/* This must be a fresh one. */
BUG_ON(new->status & IPS_NAT_DONE_MASK);

View File

@ -63,7 +63,7 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
#endif /* CONFIG_XFRM */
static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
const struct nf_nat_range *range)
const struct nf_nat_range2 *range)
{
return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
@ -143,7 +143,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range *range)
struct nf_nat_range2 *range)
{
if (tb[CTA_NAT_V4_MINIP]) {
range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
@ -246,8 +246,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
const struct nf_hook_state *state))
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
@ -285,7 +284,7 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
ret = do_chain(priv, skb, state, ct);
ret = do_chain(priv, skb, state);
if (ret != NF_ACCEPT)
return ret;
@ -326,8 +325,7 @@ nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
const struct nf_hook_state *state))
{
unsigned int ret;
__be32 daddr = ip_hdr(skb)->daddr;
@ -346,8 +344,7 @@ nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
const struct nf_hook_state *state))
{
#ifdef CONFIG_XFRM
const struct nf_conn *ct;
@ -383,8 +380,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
const struct nf_hook_state *state))
{
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;

View File

@ -24,13 +24,13 @@
unsigned int
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
const struct net_device *out)
{
struct nf_conn *ct;
struct nf_conn_nat *nat;
enum ip_conntrack_info ctinfo;
struct nf_nat_range newrange;
struct nf_nat_range2 newrange;
const struct rtable *rt;
__be32 newsrc, nh;

View File

@ -48,7 +48,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
struct nf_conntrack_tuple t = {};
const struct nf_ct_pptp_master *ct_pptp_info;
const struct nf_nat_pptp *nat_pptp_info;
struct nf_nat_range range;
struct nf_nat_range2 range;
struct nf_conn_nat *nat;
nat = nf_ct_nat_ext_add(ct);

View File

@ -41,7 +41,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
static void
gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{

View File

@ -30,7 +30,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
static void
icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{

View File

@ -28,8 +28,7 @@
static unsigned int nft_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct)
const struct nf_hook_state *state)
{
struct nft_pktinfo pkt;

View File

@ -21,7 +21,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
struct nft_masq *priv = nft_expr_priv(expr);
struct nf_nat_range range;
struct nf_nat_range2 range;
memset(&range, 0, sizeof(range));
range.flags = priv->flags;

View File

@ -383,28 +383,6 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
return dst_output(net, sk, skb);
}
unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
{
unsigned int mtu;
struct inet6_dev *idev;
if (dst_metric_locked(dst, RTAX_MTU)) {
mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
return mtu;
}
mtu = IPV6_MIN_MTU;
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
if (idev)
mtu = idev->cnf.mtu6;
rcu_read_unlock();
return mtu;
}
EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)

View File

@ -528,7 +528,6 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
.family = NFPROTO_IPV6,
};
t = ip6t_get_target(e);
return xt_check_target(&par, t->u.target_size - sizeof(*t),
e->ipv6.proto,
e->ipv6.invflags & IP6T_INV_PROTO);

View File

@ -29,7 +29,7 @@ masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
{
const struct nf_nat_range *range = par->targinfo;
const struct nf_nat_range2 *range = par->targinfo;
if (range->flags & NF_NAT_RANGE_MAP_IPS)
return -EINVAL;

View File

@ -117,6 +117,130 @@ static bool srh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
return true;
}
static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par)
{
int hdrlen, psidoff, nsidoff, lsidoff, srhoff = 0;
const struct ip6t_srh1 *srhinfo = par->matchinfo;
struct in6_addr *psid, *nsid, *lsid;
struct in6_addr _psid, _nsid, _lsid;
struct ipv6_sr_hdr *srh;
struct ipv6_sr_hdr _srh;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
return false;
srh = skb_header_pointer(skb, srhoff, sizeof(_srh), &_srh);
if (!srh)
return false;
hdrlen = ipv6_optlen(srh);
if (skb->len - srhoff < hdrlen)
return false;
if (srh->type != IPV6_SRCRT_TYPE_4)
return false;
if (srh->segments_left > srh->first_segment)
return false;
/* Next Header matching */
if (srhinfo->mt_flags & IP6T_SRH_NEXTHDR)
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NEXTHDR,
!(srh->nexthdr == srhinfo->next_hdr)))
return false;
/* Header Extension Length matching */
if (srhinfo->mt_flags & IP6T_SRH_LEN_EQ)
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_EQ,
!(srh->hdrlen == srhinfo->hdr_len)))
return false;
if (srhinfo->mt_flags & IP6T_SRH_LEN_GT)
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_GT,
!(srh->hdrlen > srhinfo->hdr_len)))
return false;
if (srhinfo->mt_flags & IP6T_SRH_LEN_LT)
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_LT,
!(srh->hdrlen < srhinfo->hdr_len)))
return false;
/* Segments Left matching */
if (srhinfo->mt_flags & IP6T_SRH_SEGS_EQ)
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_EQ,
!(srh->segments_left == srhinfo->segs_left)))
return false;
if (srhinfo->mt_flags & IP6T_SRH_SEGS_GT)
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_GT,
!(srh->segments_left > srhinfo->segs_left)))
return false;
if (srhinfo->mt_flags & IP6T_SRH_SEGS_LT)
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_LT,
!(srh->segments_left < srhinfo->segs_left)))
return false;
/**
* Last Entry matching
* Last_Entry field was introduced in revision 6 of the SRH draft.
* It was called First_Segment in the previous revision
*/
if (srhinfo->mt_flags & IP6T_SRH_LAST_EQ)
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_EQ,
!(srh->first_segment == srhinfo->last_entry)))
return false;
if (srhinfo->mt_flags & IP6T_SRH_LAST_GT)
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_GT,
!(srh->first_segment > srhinfo->last_entry)))
return false;
if (srhinfo->mt_flags & IP6T_SRH_LAST_LT)
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_LT,
!(srh->first_segment < srhinfo->last_entry)))
return false;
/**
* Tag matchig
* Tag field was introduced in revision 6 of the SRH draft
*/
if (srhinfo->mt_flags & IP6T_SRH_TAG)
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_TAG,
!(srh->tag == srhinfo->tag)))
return false;
/* Previous SID matching */
if (srhinfo->mt_flags & IP6T_SRH_PSID) {
if (srh->segments_left == srh->first_segment)
return false;
psidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
((srh->segments_left + 1) * sizeof(struct in6_addr));
psid = skb_header_pointer(skb, psidoff, sizeof(_psid), &_psid);
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_PSID,
ipv6_masked_addr_cmp(psid, &srhinfo->psid_msk,
&srhinfo->psid_addr)))
return false;
}
/* Next SID matching */
if (srhinfo->mt_flags & IP6T_SRH_NSID) {
if (srh->segments_left == 0)
return false;
nsidoff = srhoff + sizeof(struct ipv6_sr_hdr) +
((srh->segments_left - 1) * sizeof(struct in6_addr));
nsid = skb_header_pointer(skb, nsidoff, sizeof(_nsid), &_nsid);
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NSID,
ipv6_masked_addr_cmp(nsid, &srhinfo->nsid_msk,
&srhinfo->nsid_addr)))
return false;
}
/* Last SID matching */
if (srhinfo->mt_flags & IP6T_SRH_LSID) {
lsidoff = srhoff + sizeof(struct ipv6_sr_hdr);
lsid = skb_header_pointer(skb, lsidoff, sizeof(_lsid), &_lsid);
if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LSID,
ipv6_masked_addr_cmp(lsid, &srhinfo->lsid_msk,
&srhinfo->lsid_addr)))
return false;
}
return true;
}
static int srh_mt6_check(const struct xt_mtchk_param *par)
{
const struct ip6t_srh *srhinfo = par->matchinfo;
@ -136,23 +260,54 @@ static int srh_mt6_check(const struct xt_mtchk_param *par)
return 0;
}
static struct xt_match srh_mt6_reg __read_mostly = {
.name = "srh",
.family = NFPROTO_IPV6,
.match = srh_mt6,
.matchsize = sizeof(struct ip6t_srh),
.checkentry = srh_mt6_check,
.me = THIS_MODULE,
static int srh1_mt6_check(const struct xt_mtchk_param *par)
{
const struct ip6t_srh1 *srhinfo = par->matchinfo;
if (srhinfo->mt_flags & ~IP6T_SRH_MASK) {
pr_info_ratelimited("unknown srh match flags %X\n",
srhinfo->mt_flags);
return -EINVAL;
}
if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) {
pr_info_ratelimited("unknown srh invflags %X\n",
srhinfo->mt_invflags);
return -EINVAL;
}
return 0;
}
static struct xt_match srh_mt6_reg[] __read_mostly = {
{
.name = "srh",
.revision = 0,
.family = NFPROTO_IPV6,
.match = srh_mt6,
.matchsize = sizeof(struct ip6t_srh),
.checkentry = srh_mt6_check,
.me = THIS_MODULE,
},
{
.name = "srh",
.revision = 1,
.family = NFPROTO_IPV6,
.match = srh1_mt6,
.matchsize = sizeof(struct ip6t_srh1),
.checkentry = srh1_mt6_check,
.me = THIS_MODULE,
}
};
static int __init srh_mt6_init(void)
{
return xt_register_match(&srh_mt6_reg);
return xt_register_matches(srh_mt6_reg, ARRAY_SIZE(srh_mt6_reg));
}
static void __exit srh_mt6_exit(void)
{
xt_unregister_match(&srh_mt6_reg);
xt_unregister_matches(srh_mt6_reg, ARRAY_SIZE(srh_mt6_reg));
}
module_init(srh_mt6_init);

View File

@ -35,8 +35,7 @@ static const struct xt_table nf_nat_ipv6_table = {
static unsigned int ip6table_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct)
const struct nf_hook_state *state)
{
return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat);
}

View File

@ -3,256 +3,12 @@
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/neighbour.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
/* For layer 4 checksum field offset. */
#include <linux/tcp.h>
#include <linux/udp.h>
static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
struct in6_addr *addr,
struct in6_addr *new_addr)
{
struct tcphdr *tcph;
if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
skb_try_make_writable(skb, thoff + sizeof(*tcph)))
return -1;
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
new_addr->s6_addr32, true);
return 0;
}
static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
struct in6_addr *addr,
struct in6_addr *new_addr)
{
struct udphdr *udph;
if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
skb_try_make_writable(skb, thoff + sizeof(*udph)))
return -1;
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
new_addr->s6_addr32, true);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
return 0;
}
static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
unsigned int thoff, struct in6_addr *addr,
struct in6_addr *new_addr)
{
switch (ip6h->nexthdr) {
case IPPROTO_TCP:
if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
return NF_DROP;
break;
case IPPROTO_UDP:
if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
return NF_DROP;
break;
}
return 0;
}
static int nf_flow_snat_ipv6(const struct flow_offload *flow,
struct sk_buff *skb, struct ipv6hdr *ip6h,
unsigned int thoff,
enum flow_offload_tuple_dir dir)
{
struct in6_addr addr, new_addr;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = ip6h->saddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
ip6h->saddr = new_addr;
break;
case FLOW_OFFLOAD_DIR_REPLY:
addr = ip6h->daddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
ip6h->daddr = new_addr;
break;
default:
return -1;
}
return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
}
static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
struct sk_buff *skb, struct ipv6hdr *ip6h,
unsigned int thoff,
enum flow_offload_tuple_dir dir)
{
struct in6_addr addr, new_addr;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = ip6h->daddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
ip6h->daddr = new_addr;
break;
case FLOW_OFFLOAD_DIR_REPLY:
addr = ip6h->saddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
ip6h->saddr = new_addr;
break;
default:
return -1;
}
return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
}
static int nf_flow_nat_ipv6(const struct flow_offload *flow,
struct sk_buff *skb,
enum flow_offload_tuple_dir dir)
{
struct ipv6hdr *ip6h = ipv6_hdr(skb);
unsigned int thoff = sizeof(*ip6h);
if (flow->flags & FLOW_OFFLOAD_SNAT &&
(nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
return -1;
if (flow->flags & FLOW_OFFLOAD_DNAT &&
(nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
return -1;
return 0;
}
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
struct flow_offload_tuple *tuple)
{
struct flow_ports *ports;
struct ipv6hdr *ip6h;
unsigned int thoff;
if (!pskb_may_pull(skb, sizeof(*ip6h)))
return -1;
ip6h = ipv6_hdr(skb);
if (ip6h->nexthdr != IPPROTO_TCP &&
ip6h->nexthdr != IPPROTO_UDP)
return -1;
thoff = sizeof(*ip6h);
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v6 = ip6h->saddr;
tuple->dst_v6 = ip6h->daddr;
tuple->src_port = ports->source;
tuple->dst_port = ports->dest;
tuple->l3proto = AF_INET6;
tuple->l4proto = ip6h->nexthdr;
tuple->iifidx = dev->ifindex;
return 0;
}
/* Based on ip_exceeds_mtu(). */
static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)
return false;
if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
return false;
return true;
}
static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
{
u32 mtu;
mtu = ip6_dst_mtu_forward(&rt->dst);
if (__nf_flow_exceeds_mtu(skb, mtu))
return true;
return false;
}
unsigned int
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct flow_offload_tuple_rhash *tuplehash;
struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple tuple = {};
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
struct net_device *outdev;
struct in6_addr *nexthop;
struct ipv6hdr *ip6h;
struct rt6_info *rt;
if (skb->protocol != htons(ETH_P_IPV6))
return NF_ACCEPT;
if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
if (tuplehash == NULL)
return NF_ACCEPT;
outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
if (!outdev)
return NF_ACCEPT;
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
return NF_ACCEPT;
if (skb_try_make_writable(skb, sizeof(*ip6h)))
return NF_DROP;
if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
nf_flow_nat_ipv6(flow, skb, dir) < 0)
return NF_DROP;
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
ip6h = ipv6_hdr(skb);
ip6h->hop_limit--;
skb->dev = outdev;
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
return NF_STOLEN;
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
static struct nf_flowtable_type flowtable_ipv6 = {
.family = NFPROTO_IPV6,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
.init = nf_flow_table_init,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ipv6_hook,
.owner = THIS_MODULE,

View File

@ -62,7 +62,7 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
#endif
static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
const struct nf_nat_range *range)
const struct nf_nat_range2 *range)
{
return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
@ -151,7 +151,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range *range)
struct nf_nat_range2 *range)
{
if (tb[CTA_NAT_V6_MINIP]) {
nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
@ -257,8 +257,7 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
const struct nf_hook_state *state))
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
@ -303,7 +302,7 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
ret = do_chain(priv, skb, state, ct);
ret = do_chain(priv, skb, state);
if (ret != NF_ACCEPT)
return ret;
@ -343,8 +342,7 @@ nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
const struct nf_hook_state *state))
{
unsigned int ret;
struct in6_addr daddr = ipv6_hdr(skb)->daddr;
@ -363,8 +361,7 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
const struct nf_hook_state *state))
{
#ifdef CONFIG_XFRM
const struct nf_conn *ct;
@ -400,8 +397,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
const struct nf_hook_state *state))
{
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;

View File

@ -26,14 +26,14 @@
static atomic_t v6_worker_count;
unsigned int
nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
const struct net_device *out)
{
enum ip_conntrack_info ctinfo;
struct nf_conn_nat *nat;
struct in6_addr src;
struct nf_conn *ct;
struct nf_nat_range newrange;
struct nf_nat_range2 newrange;
ct = nf_ct_get(skb, &ctinfo);
WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||

View File

@ -32,7 +32,7 @@ icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
static void
icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{

View File

@ -26,8 +26,7 @@
static unsigned int nft_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct)
const struct nf_hook_state *state)
{
struct nft_pktinfo pkt;

View File

@ -22,7 +22,7 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
struct nft_masq *priv = nft_expr_priv(expr);
struct nf_nat_range range;
struct nf_nat_range2 range;
memset(&range, 0, sizeof(range));
range.flags = priv->flags;

View File

@ -22,7 +22,7 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
struct nft_redir *priv = nft_expr_priv(expr);
struct nf_nat_range range;
struct nf_nat_range2 range;
memset(&range, 0, sizeof(range));
if (priv->sreg_proto_min) {

View File

@ -444,6 +444,9 @@ config NETFILTER_SYNPROXY
endif # NF_CONNTRACK
config NF_OSF
tristate 'Passive OS fingerprint infrastructure'
config NF_TABLES
select NETFILTER_NETLINK
tristate "Netfilter nf_tables support"
@ -474,24 +477,6 @@ config NF_TABLES_NETDEV
help
This option enables support for the "netdev" table.
config NFT_EXTHDR
tristate "Netfilter nf_tables exthdr module"
help
This option adds the "exthdr" expression that you can use to match
IPv6 extension headers and tcp options.
config NFT_META
tristate "Netfilter nf_tables meta module"
help
This option adds the "meta" expression that you can use to match and
to set packet metainformation such as the packet mark.
config NFT_RT
tristate "Netfilter nf_tables routing module"
help
This option adds the "rt" expression that you can use to match
packet routing information such as the packet nexthop.
config NFT_NUMGEN
tristate "Netfilter nf_tables number generator module"
help
@ -667,8 +652,7 @@ endif # NF_TABLES
config NF_FLOW_TABLE_INET
tristate "Netfilter flow table mixed IPv4/IPv6 module"
depends on NF_FLOW_TABLE_IPV4
depends on NF_FLOW_TABLE_IPV6
depends on NF_FLOW_TABLE
help
This option adds the flow table mixed IPv4/IPv6 support.
@ -1378,6 +1362,7 @@ config NETFILTER_XT_MATCH_NFACCT
config NETFILTER_XT_MATCH_OSF
tristate '"osf" Passive OS fingerprint match'
depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
select NF_OSF
help
This option selects the Passive OS Fingerprinting match module
that allows to passively match the remote operating system by

View File

@ -76,13 +76,10 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
nft_dynset.o
nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o
obj-$(CONFIG_NF_TABLES) += nf_tables.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
obj-$(CONFIG_NFT_META) += nft_meta.o
obj-$(CONFIG_NFT_RT) += nft_rt.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o
@ -104,6 +101,7 @@ obj-$(CONFIG_NFT_HASH) += nft_hash.o
obj-$(CONFIG_NFT_FIB) += nft_fib.o
obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
obj-$(CONFIG_NF_OSF) += nf_osf.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
@ -111,6 +109,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
# flow table infrastructure
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
# generic X tables

View File

@ -225,6 +225,25 @@ config IP_VS_SH
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
config IP_VS_MH
tristate "maglev hashing scheduling"
---help---
The maglev consistent hashing scheduling algorithm provides the
Google's Maglev hashing algorithm as a IPVS scheduler. It assigns
network connections to the servers through looking up a statically
assigned special hash table called the lookup table. Maglev hashing
is to assign a preference list of all the lookup table positions
to each destination.
Through this operation, The maglev hashing gives an almost equal
share of the lookup table to each of the destinations and provides
minimal disruption by using the lookup table. When the set of
destinations changes, a connection will likely be sent to the same
destination as it was before.
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
config IP_VS_SED
tristate "shortest expected delay scheduling"
---help---
@ -266,6 +285,24 @@ config IP_VS_SH_TAB_BITS
needs to be large enough to effectively fit all the destinations
multiplied by their respective weights.
comment 'IPVS MH scheduler'
config IP_VS_MH_TAB_INDEX
int "IPVS maglev hashing table index of size (the prime numbers)"
range 8 17
default 12
---help---
The maglev hashing scheduler maps source IPs to destinations
stored in a hash table. This table is assigned by a preference
list of the positions to each destination until all slots in
the table are filled. The index determines the prime for size of
the table as 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
65521 or 131071. When using weights to allow destinations to
receive more connections, the table is assigned an amount
proportional to the weights specified. The table needs to be large
enough to effectively fit all the destinations multiplied by their
respective weights.
comment 'IPVS application helper'
config IP_VS_FTP

View File

@ -33,6 +33,7 @@ obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o
obj-$(CONFIG_IP_VS_MH) += ip_vs_mh.o
obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o
obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o

View File

@ -821,6 +821,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
if (add && udest->af != svc->af)
ipvs->mixed_address_family_dests++;
/* keep the last_weight with latest non-0 weight */
if (add || udest->weight != 0)
atomic_set(&dest->last_weight, udest->weight);
/* set the weight and the flags */
atomic_set(&dest->weight, udest->weight);
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;

View File

@ -43,6 +43,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/hash.h>
#include <net/ip_vs.h>
@ -81,7 +82,7 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
#endif
return (ntohl(addr_fold)*2654435761UL) & IP_VS_DH_TAB_MASK;
return hash_32(ntohl(addr_fold), IP_VS_DH_TAB_BITS);
}

View File

@ -48,6 +48,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/jiffies.h>
#include <linux/hash.h>
/* for sysctl */
#include <linux/fs.h>
@ -160,7 +161,7 @@ ip_vs_lblc_hashkey(int af, const union nf_inet_addr *addr)
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
#endif
return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLC_TAB_MASK;
return hash_32(ntohl(addr_fold), IP_VS_LBLC_TAB_BITS);
}
@ -371,6 +372,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
tbl->counter = 1;
tbl->dead = false;
tbl->svc = svc;
atomic_set(&tbl->entries, 0);
/*
* Hook periodic timer for garbage collection

View File

@ -47,6 +47,7 @@
#include <linux/jiffies.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/hash.h>
/* for sysctl */
#include <linux/fs.h>
@ -323,7 +324,7 @@ ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr)
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
#endif
return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
return hash_32(ntohl(addr_fold), IP_VS_LBLCR_TAB_BITS);
}
@ -534,6 +535,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
tbl->counter = 1;
tbl->dead = false;
tbl->svc = svc;
atomic_set(&tbl->entries, 0);
/*
* Hook periodic timer for garbage collection

View File

@ -0,0 +1,540 @@
// SPDX-License-Identifier: GPL-2.0
/* IPVS: Maglev Hashing scheduling module
*
* Authors: Inju Song <inju.song@navercorp.com>
*
*/
/* The mh algorithm is to assign a preference list of all the lookup
* table positions to each destination and populate the table with
* the most-preferred position of destinations. Then it is to select
* destination with the hash key of source IP address through looking
* up a the lookup table.
*
* The algorithm is detailed in:
* [3.4 Consistent Hasing]
https://www.usenix.org/system/files/conference/nsdi16/nsdi16-paper-eisenbud.pdf
*
*/
#define KMSG_COMPONENT "IPVS"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/ip.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <net/ip_vs.h>
#include <linux/siphash.h>
#include <linux/bitops.h>
#include <linux/gcd.h>
#define IP_VS_SVC_F_SCHED_MH_FALLBACK IP_VS_SVC_F_SCHED1 /* MH fallback */
#define IP_VS_SVC_F_SCHED_MH_PORT IP_VS_SVC_F_SCHED2 /* MH use port */
struct ip_vs_mh_lookup {
struct ip_vs_dest __rcu *dest; /* real server (cache) */
};
struct ip_vs_mh_dest_setup {
unsigned int offset; /* starting offset */
unsigned int skip; /* skip */
unsigned int perm; /* next_offset */
int turns; /* weight / gcd() and rshift */
};
/* Available prime numbers for MH table */
static int primes[] = {251, 509, 1021, 2039, 4093,
8191, 16381, 32749, 65521, 131071};
/* For IPVS MH entry hash table */
#ifndef CONFIG_IP_VS_MH_TAB_INDEX
#define CONFIG_IP_VS_MH_TAB_INDEX 12
#endif
#define IP_VS_MH_TAB_BITS (CONFIG_IP_VS_MH_TAB_INDEX / 2)
#define IP_VS_MH_TAB_INDEX (CONFIG_IP_VS_MH_TAB_INDEX - 8)
#define IP_VS_MH_TAB_SIZE primes[IP_VS_MH_TAB_INDEX]
struct ip_vs_mh_state {
struct rcu_head rcu_head;
struct ip_vs_mh_lookup *lookup;
struct ip_vs_mh_dest_setup *dest_setup;
hsiphash_key_t hash1, hash2;
int gcd;
int rshift;
};
static inline void generate_hash_secret(hsiphash_key_t *hash1,
hsiphash_key_t *hash2)
{
hash1->key[0] = 2654435761UL;
hash1->key[1] = 2654435761UL;
hash2->key[0] = 2654446892UL;
hash2->key[1] = 2654446892UL;
}
/* Helper function to determine if server is unavailable */
static inline bool is_unavailable(struct ip_vs_dest *dest)
{
return atomic_read(&dest->weight) <= 0 ||
dest->flags & IP_VS_DEST_F_OVERLOAD;
}
/* Returns hash value for IPVS MH entry */
static inline unsigned int
ip_vs_mh_hashkey(int af, const union nf_inet_addr *addr,
__be16 port, hsiphash_key_t *key, unsigned int offset)
{
unsigned int v;
__be32 addr_fold = addr->ip;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
addr_fold = addr->ip6[0] ^ addr->ip6[1] ^
addr->ip6[2] ^ addr->ip6[3];
#endif
v = (offset + ntohs(port) + ntohl(addr_fold));
return hsiphash(&v, sizeof(v), key);
}
/* Reset all the hash buckets of the specified table. */
static void ip_vs_mh_reset(struct ip_vs_mh_state *s)
{
int i;
struct ip_vs_mh_lookup *l;
struct ip_vs_dest *dest;
l = &s->lookup[0];
for (i = 0; i < IP_VS_MH_TAB_SIZE; i++) {
dest = rcu_dereference_protected(l->dest, 1);
if (dest) {
ip_vs_dest_put(dest);
RCU_INIT_POINTER(l->dest, NULL);
}
l++;
}
}
static int ip_vs_mh_permutate(struct ip_vs_mh_state *s,
struct ip_vs_service *svc)
{
struct list_head *p;
struct ip_vs_mh_dest_setup *ds;
struct ip_vs_dest *dest;
int lw;
/* If gcd is smaller then 1, number of dests or
* all last_weight of dests are zero. So, skip
* permutation for the dests.
*/
if (s->gcd < 1)
return 0;
/* Set dest_setup for the dests permutation */
p = &svc->destinations;
ds = &s->dest_setup[0];
while ((p = p->next) != &svc->destinations) {
dest = list_entry(p, struct ip_vs_dest, n_list);
ds->offset = ip_vs_mh_hashkey(svc->af, &dest->addr,
dest->port, &s->hash1, 0) %
IP_VS_MH_TAB_SIZE;
ds->skip = ip_vs_mh_hashkey(svc->af, &dest->addr,
dest->port, &s->hash2, 0) %
(IP_VS_MH_TAB_SIZE - 1) + 1;
ds->perm = ds->offset;
lw = atomic_read(&dest->last_weight);
ds->turns = ((lw / s->gcd) >> s->rshift) ? : (lw != 0);
ds++;
}
return 0;
}
static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
struct ip_vs_service *svc)
{
int n, c, dt_count;
unsigned long *table;
struct list_head *p;
struct ip_vs_mh_dest_setup *ds;
struct ip_vs_dest *dest, *new_dest;
/* If gcd is smaller then 1, number of dests or
* all last_weight of dests are zero. So, skip
* the population for the dests and reset lookup table.
*/
if (s->gcd < 1) {
ip_vs_mh_reset(s);
return 0;
}
table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
sizeof(unsigned long), GFP_KERNEL);
if (!table)
return -ENOMEM;
p = &svc->destinations;
n = 0;
dt_count = 0;
while (n < IP_VS_MH_TAB_SIZE) {
if (p == &svc->destinations)
p = p->next;
ds = &s->dest_setup[0];
while (p != &svc->destinations) {
/* Ignore added server with zero weight */
if (ds->turns < 1) {
p = p->next;
ds++;
continue;
}
c = ds->perm;
while (test_bit(c, table)) {
/* Add skip, mod IP_VS_MH_TAB_SIZE */
ds->perm += ds->skip;
if (ds->perm >= IP_VS_MH_TAB_SIZE)
ds->perm -= IP_VS_MH_TAB_SIZE;
c = ds->perm;
}
__set_bit(c, table);
dest = rcu_dereference_protected(s->lookup[c].dest, 1);
new_dest = list_entry(p, struct ip_vs_dest, n_list);
if (dest != new_dest) {
if (dest)
ip_vs_dest_put(dest);
ip_vs_dest_hold(new_dest);
RCU_INIT_POINTER(s->lookup[c].dest, new_dest);
}
if (++n == IP_VS_MH_TAB_SIZE)
goto out;
if (++dt_count >= ds->turns) {
dt_count = 0;
p = p->next;
ds++;
}
}
}
out:
kfree(table);
return 0;
}
/* Get ip_vs_dest associated with supplied parameters. */
static inline struct ip_vs_dest *
ip_vs_mh_get(struct ip_vs_service *svc, struct ip_vs_mh_state *s,
const union nf_inet_addr *addr, __be16 port)
{
unsigned int hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1, 0)
% IP_VS_MH_TAB_SIZE;
struct ip_vs_dest *dest = rcu_dereference(s->lookup[hash].dest);
return (!dest || is_unavailable(dest)) ? NULL : dest;
}
/* As ip_vs_mh_get, but with fallback if selected server is unavailable */
static inline struct ip_vs_dest *
ip_vs_mh_get_fallback(struct ip_vs_service *svc, struct ip_vs_mh_state *s,
const union nf_inet_addr *addr, __be16 port)
{
unsigned int offset, roffset;
unsigned int hash, ihash;
struct ip_vs_dest *dest;
/* First try the dest it's supposed to go to */
ihash = ip_vs_mh_hashkey(svc->af, addr, port,
&s->hash1, 0) % IP_VS_MH_TAB_SIZE;
dest = rcu_dereference(s->lookup[ihash].dest);
if (!dest)
return NULL;
if (!is_unavailable(dest))
return dest;
IP_VS_DBG_BUF(6, "MH: selected unavailable server %s:%u, reselecting",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
/* If the original dest is unavailable, loop around the table
* starting from ihash to find a new dest
*/
for (offset = 0; offset < IP_VS_MH_TAB_SIZE; offset++) {
roffset = (offset + ihash) % IP_VS_MH_TAB_SIZE;
hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1,
roffset) % IP_VS_MH_TAB_SIZE;
dest = rcu_dereference(s->lookup[hash].dest);
if (!dest)
break;
if (!is_unavailable(dest))
return dest;
IP_VS_DBG_BUF(6,
"MH: selected unavailable server %s:%u (offset %u), reselecting",
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port), roffset);
}
return NULL;
}
/* Assign all the hash buckets of the specified table with the service. */
static int ip_vs_mh_reassign(struct ip_vs_mh_state *s,
struct ip_vs_service *svc)
{
int ret;
if (svc->num_dests > IP_VS_MH_TAB_SIZE)
return -EINVAL;
if (svc->num_dests >= 1) {
s->dest_setup = kcalloc(svc->num_dests,
sizeof(struct ip_vs_mh_dest_setup),
GFP_KERNEL);
if (!s->dest_setup)
return -ENOMEM;
}
ip_vs_mh_permutate(s, svc);
ret = ip_vs_mh_populate(s, svc);
if (ret < 0)
goto out;
IP_VS_DBG_BUF(6, "MH: reassign lookup table of %s:%u\n",
IP_VS_DBG_ADDR(svc->af, &svc->addr),
ntohs(svc->port));
out:
if (svc->num_dests >= 1) {
kfree(s->dest_setup);
s->dest_setup = NULL;
}
return ret;
}
static int ip_vs_mh_gcd_weight(struct ip_vs_service *svc)
{
struct ip_vs_dest *dest;
int weight;
int g = 0;
list_for_each_entry(dest, &svc->destinations, n_list) {
weight = atomic_read(&dest->last_weight);
if (weight > 0) {
if (g > 0)
g = gcd(weight, g);
else
g = weight;
}
}
return g;
}
/* To avoid assigning huge weight for the MH table,
* calculate shift value with gcd.
*/
static int ip_vs_mh_shift_weight(struct ip_vs_service *svc, int gcd)
{
struct ip_vs_dest *dest;
int new_weight, weight = 0;
int mw, shift;
/* If gcd is smaller then 1, number of dests or
* all last_weight of dests are zero. So, return
* shift value as zero.
*/
if (gcd < 1)
return 0;
list_for_each_entry(dest, &svc->destinations, n_list) {
new_weight = atomic_read(&dest->last_weight);
if (new_weight > weight)
weight = new_weight;
}
/* Because gcd is greater than zero,
* the maximum weight and gcd are always greater than zero
*/
mw = weight / gcd;
/* shift = occupied bits of weight/gcd - MH highest bits */
shift = fls(mw) - IP_VS_MH_TAB_BITS;
return (shift >= 0) ? shift : 0;
}
static void ip_vs_mh_state_free(struct rcu_head *head)
{
struct ip_vs_mh_state *s;
s = container_of(head, struct ip_vs_mh_state, rcu_head);
kfree(s->lookup);
kfree(s);
}
static int ip_vs_mh_init_svc(struct ip_vs_service *svc)
{
int ret;
struct ip_vs_mh_state *s;
/* Allocate the MH table for this service */
s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
s->lookup = kcalloc(IP_VS_MH_TAB_SIZE, sizeof(struct ip_vs_mh_lookup),
GFP_KERNEL);
if (!s->lookup) {
kfree(s);
return -ENOMEM;
}
generate_hash_secret(&s->hash1, &s->hash2);
s->gcd = ip_vs_mh_gcd_weight(svc);
s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
IP_VS_DBG(6,
"MH lookup table (memory=%zdbytes) allocated for current service\n",
sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
/* Assign the lookup table with current dests */
ret = ip_vs_mh_reassign(s, svc);
if (ret < 0) {
ip_vs_mh_reset(s);
ip_vs_mh_state_free(&s->rcu_head);
return ret;
}
/* No more failures, attach state */
svc->sched_data = s;
return 0;
}
static void ip_vs_mh_done_svc(struct ip_vs_service *svc)
{
struct ip_vs_mh_state *s = svc->sched_data;
/* Got to clean up lookup entry here */
ip_vs_mh_reset(s);
call_rcu(&s->rcu_head, ip_vs_mh_state_free);
IP_VS_DBG(6, "MH lookup table (memory=%zdbytes) released\n",
sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE);
}
static int ip_vs_mh_dest_changed(struct ip_vs_service *svc,
struct ip_vs_dest *dest)
{
struct ip_vs_mh_state *s = svc->sched_data;
s->gcd = ip_vs_mh_gcd_weight(svc);
s->rshift = ip_vs_mh_shift_weight(svc, s->gcd);
/* Assign the lookup table with the updated service */
return ip_vs_mh_reassign(s, svc);
}
/* Helper function to get port number */
static inline __be16
ip_vs_mh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
{
__be16 _ports[2], *ports;
/* At this point we know that we have a valid packet of some kind.
* Because ICMP packets are only guaranteed to have the first 8
* bytes, let's just grab the ports. Fortunately they're in the
* same position for all three of the protocols we care about.
*/
switch (iph->protocol) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_SCTP:
ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
&_ports);
if (unlikely(!ports))
return 0;
if (likely(!ip_vs_iph_inverse(iph)))
return ports[0];
else
return ports[1];
default:
return 0;
}
}
/* Maglev Hashing scheduling */
static struct ip_vs_dest *
ip_vs_mh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest;
struct ip_vs_mh_state *s;
__be16 port = 0;
const union nf_inet_addr *hash_addr;
hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
IP_VS_DBG(6, "%s : Scheduling...\n", __func__);
if (svc->flags & IP_VS_SVC_F_SCHED_MH_PORT)
port = ip_vs_mh_get_port(skb, iph);
s = (struct ip_vs_mh_state *)svc->sched_data;
if (svc->flags & IP_VS_SVC_F_SCHED_MH_FALLBACK)
dest = ip_vs_mh_get_fallback(svc, s, hash_addr, port);
else
dest = ip_vs_mh_get(svc, s, hash_addr, port);
if (!dest) {
ip_vs_scheduler_err(svc, "no destination available");
return NULL;
}
IP_VS_DBG_BUF(6, "MH: source IP address %s:%u --> server %s:%u\n",
IP_VS_DBG_ADDR(svc->af, hash_addr),
ntohs(port),
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
return dest;
}
/* IPVS MH Scheduler structure */
static struct ip_vs_scheduler ip_vs_mh_scheduler = {
.name = "mh",
.refcnt = ATOMIC_INIT(0),
.module = THIS_MODULE,
.n_list = LIST_HEAD_INIT(ip_vs_mh_scheduler.n_list),
.init_service = ip_vs_mh_init_svc,
.done_service = ip_vs_mh_done_svc,
.add_dest = ip_vs_mh_dest_changed,
.del_dest = ip_vs_mh_dest_changed,
.upd_dest = ip_vs_mh_dest_changed,
.schedule = ip_vs_mh_schedule,
};
static int __init ip_vs_mh_init(void)
{
return register_ip_vs_scheduler(&ip_vs_mh_scheduler);
}
static void __exit ip_vs_mh_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_mh_scheduler);
rcu_barrier();
}
module_init(ip_vs_mh_init);
module_exit(ip_vs_mh_cleanup);
MODULE_DESCRIPTION("Maglev hashing ipvs scheduler");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Inju Song <inju.song@navercorp.com>");

View File

@ -436,7 +436,7 @@ static bool tcp_state_active(int state)
return tcp_state_active_table[state];
}
static struct tcp_states_t tcp_states [] = {
static struct tcp_states_t tcp_states[] = {
/* INPUT */
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }},
@ -459,7 +459,7 @@ static struct tcp_states_t tcp_states [] = {
/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
};
static struct tcp_states_t tcp_states_dos [] = {
static struct tcp_states_t tcp_states_dos[] = {
/* INPUT */
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
/*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }},

View File

@ -96,7 +96,8 @@ ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
#endif
return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) &
return (offset + hash_32(ntohs(port) + ntohl(addr_fold),
IP_VS_SH_TAB_BITS)) &
IP_VS_SH_TAB_MASK;
}

View File

@ -186,6 +186,7 @@ unsigned int nf_conntrack_htable_size __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_max);
seqcount_t nf_conntrack_generation __read_mostly;
static unsigned int nf_conntrack_hash_rnd __read_mostly;

View File

@ -566,8 +566,7 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = {
.timeout = 5 * 60,
};
/* don't make this __exit, since it's called from __init ! */
static void nf_conntrack_ftp_fini(void)
static void __exit nf_conntrack_ftp_fini(void)
{
nf_conntrack_helpers_unregister(ftp, ports_c * 2);
kfree(ftp_buffer);

View File

@ -232,8 +232,6 @@ static int help(struct sk_buff *skb, unsigned int protoff,
static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly;
static struct nf_conntrack_expect_policy irc_exp_policy;
static void nf_conntrack_irc_fini(void);
static int __init nf_conntrack_irc_init(void)
{
int i, ret;
@ -276,9 +274,7 @@ static int __init nf_conntrack_irc_init(void)
return 0;
}
/* This function is intentionally _NOT_ defined as __exit, because
* it is needed by the init function */
static void nf_conntrack_irc_fini(void)
static void __exit nf_conntrack_irc_fini(void)
{
nf_conntrack_helpers_unregister(irc, ports_c);
kfree(irc_buffer);

View File

@ -2205,6 +2205,9 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
goto nla_put_failure;
if (nla_put_be32(skb, CTA_STATS_GLOBAL_MAX_ENTRIES, htonl(nf_conntrack_max)))
goto nla_put_failure;
nlmsg_end(skb, nlh);
return skb->len;

View File

@ -173,8 +173,7 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = {
.timeout = 5 * 60,
};
/* don't make this __exit, since it's called from __init ! */
static void nf_conntrack_sane_fini(void)
static void __exit nf_conntrack_sane_fini(void)
{
nf_conntrack_helpers_unregister(sane, ports_c * 2);
kfree(sane_buffer);

View File

@ -1617,7 +1617,7 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1
},
};
static void nf_conntrack_sip_fini(void)
static void __exit nf_conntrack_sip_fini(void)
{
nf_conntrack_helpers_unregister(sip, ports_c * 4);
}

View File

@ -104,7 +104,7 @@ static const struct nf_conntrack_expect_policy tftp_exp_policy = {
.timeout = 5 * 60,
};
static void nf_conntrack_tftp_fini(void)
static void __exit nf_conntrack_tftp_fini(void)
{
nf_conntrack_helpers_unregister(tftp, ports_c * 2);
}

View File

@ -4,6 +4,8 @@
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/ip6_route.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_conntrack.h>
@ -16,6 +18,43 @@ struct flow_offload_entry {
struct rcu_head rcu_head;
};
static DEFINE_MUTEX(flowtable_lock);
static LIST_HEAD(flowtables);
static void
flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
struct nf_flow_route *route,
enum flow_offload_tuple_dir dir)
{
struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
struct dst_entry *dst = route->tuple[dir].dst;
ft->dir = dir;
switch (ctt->src.l3num) {
case NFPROTO_IPV4:
ft->src_v4 = ctt->src.u3.in;
ft->dst_v4 = ctt->dst.u3.in;
ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
break;
case NFPROTO_IPV6:
ft->src_v6 = ctt->src.u3.in6;
ft->dst_v6 = ctt->dst.u3.in6;
ft->mtu = ip6_dst_mtu_forward(dst);
break;
}
ft->l3proto = ctt->src.l3num;
ft->l4proto = ctt->dst.protonum;
ft->src_port = ctt->src.u.tcp.port;
ft->dst_port = ctt->dst.u.tcp.port;
ft->iifidx = route->tuple[dir].ifindex;
ft->oifidx = route->tuple[!dir].ifindex;
ft->dst_cache = dst;
}
struct flow_offload *
flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
{
@ -40,69 +79,12 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
entry->ct = ct;
switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
case NFPROTO_IPV4:
flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
break;
case NFPROTO_IPV6:
flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
break;
}
flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
FLOW_OFFLOAD_DIR_ORIGINAL;
flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
FLOW_OFFLOAD_DIR_REPLY;
flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
if (ct->status & IPS_SRC_NAT)
flow->flags |= FLOW_OFFLOAD_SNAT;
else if (ct->status & IPS_DST_NAT)
if (ct->status & IPS_DST_NAT)
flow->flags |= FLOW_OFFLOAD_DNAT;
return flow;
@ -118,6 +100,43 @@ err_ct_refcnt:
}
EXPORT_SYMBOL_GPL(flow_offload_alloc);
static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
{
tcp->state = TCP_CONNTRACK_ESTABLISHED;
tcp->seen[0].td_maxwin = 0;
tcp->seen[1].td_maxwin = 0;
}
static void flow_offload_fixup_ct_state(struct nf_conn *ct)
{
const struct nf_conntrack_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
unsigned int *timeouts;
unsigned int timeout;
int l4num;
l4num = nf_ct_protonum(ct);
if (l4num == IPPROTO_TCP)
flow_offload_fixup_tcp(&ct->proto.tcp);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
if (!l4proto)
return;
timeouts = l4proto->get_timeouts(net);
if (!timeouts)
return;
if (l4num == IPPROTO_TCP)
timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
else if (l4num == IPPROTO_UDP)
timeout = timeouts[UDP_CT_REPLIED];
else
return;
ct->timeout = nfct_time_stamp + timeout;
}
void flow_offload_free(struct flow_offload *flow)
{
struct flow_offload_entry *e;
@ -125,17 +144,46 @@ void flow_offload_free(struct flow_offload *flow)
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
e = container_of(flow, struct flow_offload_entry, flow);
nf_ct_delete(e->ct, 0, 0);
if (flow->flags & FLOW_OFFLOAD_DYING)
nf_ct_delete(e->ct, 0, 0);
nf_ct_put(e->ct);
kfree_rcu(e, rcu_head);
}
EXPORT_SYMBOL_GPL(flow_offload_free);
void flow_offload_dead(struct flow_offload *flow)
static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
{
flow->flags |= FLOW_OFFLOAD_DYING;
const struct flow_offload_tuple *tuple = data;
return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
}
EXPORT_SYMBOL_GPL(flow_offload_dead);
static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
{
const struct flow_offload_tuple_rhash *tuplehash = data;
return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
}
static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
const struct flow_offload_tuple *tuple = arg->key;
const struct flow_offload_tuple_rhash *x = ptr;
if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
return 1;
return 0;
}
static const struct rhashtable_params nf_flow_offload_rhash_params = {
.head_offset = offsetof(struct flow_offload_tuple_rhash, node),
.hashfn = flow_offload_hash,
.obj_hashfn = flow_offload_hash_obj,
.obj_cmpfn = flow_offload_hash_cmp,
.automatic_shrinking = true,
};
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
{
@ -143,10 +191,10 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
rhashtable_insert_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
*flow_table->type->params);
nf_flow_offload_rhash_params);
rhashtable_insert_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
*flow_table->type->params);
nf_flow_offload_rhash_params);
return 0;
}
EXPORT_SYMBOL_GPL(flow_offload_add);
@ -154,22 +202,51 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
static void flow_offload_del(struct nf_flowtable *flow_table,
struct flow_offload *flow)
{
struct flow_offload_entry *e;
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
*flow_table->type->params);
nf_flow_offload_rhash_params);
rhashtable_remove_fast(&flow_table->rhashtable,
&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
*flow_table->type->params);
nf_flow_offload_rhash_params);
e = container_of(flow, struct flow_offload_entry, flow);
clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
flow_offload_free(flow);
}
void flow_offload_teardown(struct flow_offload *flow)
{
struct flow_offload_entry *e;
flow->flags |= FLOW_OFFLOAD_TEARDOWN;
e = container_of(flow, struct flow_offload_entry, flow);
flow_offload_fixup_ct_state(e->ct);
}
EXPORT_SYMBOL_GPL(flow_offload_teardown);
struct flow_offload_tuple_rhash *
flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple)
{
return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
*flow_table->type->params);
struct flow_offload_tuple_rhash *tuplehash;
struct flow_offload *flow;
int dir;
tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
nf_flow_offload_rhash_params);
if (!tuplehash)
return NULL;
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
return NULL;
return tuplehash;
}
EXPORT_SYMBOL_GPL(flow_offload_lookup);
@ -216,11 +293,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
return (__s32)(flow->timeout - (u32)jiffies) <= 0;
}
static inline bool nf_flow_is_dying(const struct flow_offload *flow)
{
return flow->flags & FLOW_OFFLOAD_DYING;
}
static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
{
struct flow_offload_tuple_rhash *tuplehash;
@ -248,7 +320,8 @@ static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
if (nf_flow_has_expired(flow) ||
nf_flow_is_dying(flow))
(flow->flags & (FLOW_OFFLOAD_DYING |
FLOW_OFFLOAD_TEARDOWN)))
flow_offload_del(flow_table, flow);
}
out:
@ -258,7 +331,7 @@ out:
return 1;
}
void nf_flow_offload_work_gc(struct work_struct *work)
static void nf_flow_offload_work_gc(struct work_struct *work)
{
struct nf_flowtable *flow_table;
@ -266,42 +339,6 @@ void nf_flow_offload_work_gc(struct work_struct *work)
nf_flow_offload_gc_step(flow_table);
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
{
const struct flow_offload_tuple *tuple = data;
return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
}
static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
{
const struct flow_offload_tuple_rhash *tuplehash = data;
return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
}
static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
const struct flow_offload_tuple *tuple = arg->key;
const struct flow_offload_tuple_rhash *x = ptr;
if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
return 1;
return 0;
}
const struct rhashtable_params nf_flow_offload_rhash_params = {
.head_offset = offsetof(struct flow_offload_tuple_rhash, node),
.hashfn = flow_offload_hash,
.obj_hashfn = flow_offload_hash_obj,
.obj_cmpfn = flow_offload_hash_cmp,
.automatic_shrinking = true,
};
EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
__be16 port, __be16 new_port)
@ -419,33 +456,69 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
}
EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
int nf_flow_table_init(struct nf_flowtable *flowtable)
{
int err;
INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
err = rhashtable_init(&flowtable->rhashtable,
&nf_flow_offload_rhash_params);
if (err < 0)
return err;
queue_delayed_work(system_power_efficient_wq,
&flowtable->gc_work, HZ);
mutex_lock(&flowtable_lock);
list_add(&flowtable->list, &flowtables);
mutex_unlock(&flowtable_lock);
return 0;
}
EXPORT_SYMBOL_GPL(nf_flow_table_init);
static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
{
struct net_device *dev = data;
if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
if (!dev) {
flow_offload_teardown(flow);
return;
}
flow_offload_dead(flow);
if (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
flow->tuplehash[1].tuple.iifidx == dev->ifindex)
flow_offload_dead(flow);
}
static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
void *data)
struct net_device *dev)
{
nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
flush_delayed_work(&flowtable->gc_work);
}
void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
{
nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
struct nf_flowtable *flowtable;
mutex_lock(&flowtable_lock);
list_for_each_entry(flowtable, &flowtables, list)
nf_flow_table_iterate_cleanup(flowtable, dev);
mutex_unlock(&flowtable_lock);
}
EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
void nf_flow_table_free(struct nf_flowtable *flow_table)
{
mutex_lock(&flowtable_lock);
list_del(&flow_table->list);
mutex_unlock(&flowtable_lock);
cancel_delayed_work_sync(&flow_table->gc_work);
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
WARN_ON(!nf_flow_offload_gc_step(flow_table));
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);

View File

@ -22,8 +22,7 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
static struct nf_flowtable_type flowtable_inet = {
.family = NFPROTO_INET,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
.init = nf_flow_table_init,
.free = nf_flow_table_free,
.hook = nf_flow_offload_inet_hook,
.owner = THIS_MODULE,

View File

@ -0,0 +1,487 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/neighbour.h>
#include <net/netfilter/nf_flow_table.h>
/* For layer 4 checksum field offset. */
#include <linux/tcp.h>
#include <linux/udp.h>
static int nf_flow_state_check(struct flow_offload *flow, int proto,
struct sk_buff *skb, unsigned int thoff)
{
struct tcphdr *tcph;
if (proto != IPPROTO_TCP)
return 0;
if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
return -1;
tcph = (void *)(skb_network_header(skb) + thoff);
if (unlikely(tcph->fin || tcph->rst)) {
flow_offload_teardown(flow);
return -1;
}
return 0;
}
static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
__be32 addr, __be32 new_addr)
{
struct tcphdr *tcph;
if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
skb_try_make_writable(skb, thoff + sizeof(*tcph)))
return -1;
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
return 0;
}
static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
__be32 addr, __be32 new_addr)
{
struct udphdr *udph;
if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
skb_try_make_writable(skb, thoff + sizeof(*udph)))
return -1;
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace4(&udph->check, skb, addr,
new_addr, true);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
return 0;
}
static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
unsigned int thoff, __be32 addr,
__be32 new_addr)
{
switch (iph->protocol) {
case IPPROTO_TCP:
if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
return NF_DROP;
break;
case IPPROTO_UDP:
if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
return NF_DROP;
break;
}
return 0;
}
static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
struct iphdr *iph, unsigned int thoff,
enum flow_offload_tuple_dir dir)
{
__be32 addr, new_addr;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = iph->saddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
iph->saddr = new_addr;
break;
case FLOW_OFFLOAD_DIR_REPLY:
addr = iph->daddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
iph->daddr = new_addr;
break;
default:
return -1;
}
csum_replace4(&iph->check, addr, new_addr);
return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}
static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
struct iphdr *iph, unsigned int thoff,
enum flow_offload_tuple_dir dir)
{
__be32 addr, new_addr;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = iph->daddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
iph->daddr = new_addr;
break;
case FLOW_OFFLOAD_DIR_REPLY:
addr = iph->saddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
iph->saddr = new_addr;
break;
default:
return -1;
}
csum_replace4(&iph->check, addr, new_addr);
return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}
static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
unsigned int thoff, enum flow_offload_tuple_dir dir)
{
struct iphdr *iph = ip_hdr(skb);
if (flow->flags & FLOW_OFFLOAD_SNAT &&
(nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
return -1;
if (flow->flags & FLOW_OFFLOAD_DNAT &&
(nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
return -1;
return 0;
}
static bool ip_has_options(unsigned int thoff)
{
return thoff != sizeof(struct iphdr);
}
static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
struct flow_offload_tuple *tuple)
{
struct flow_ports *ports;
unsigned int thoff;
struct iphdr *iph;
if (!pskb_may_pull(skb, sizeof(*iph)))
return -1;
iph = ip_hdr(skb);
thoff = iph->ihl * 4;
if (ip_is_fragment(iph) ||
unlikely(ip_has_options(thoff)))
return -1;
if (iph->protocol != IPPROTO_TCP &&
iph->protocol != IPPROTO_UDP)
return -1;
thoff = iph->ihl * 4;
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v4.s_addr = iph->saddr;
tuple->dst_v4.s_addr = iph->daddr;
tuple->src_port = ports->source;
tuple->dst_port = ports->dest;
tuple->l3proto = AF_INET;
tuple->l4proto = iph->protocol;
tuple->iifidx = dev->ifindex;
return 0;
}
/* Based on ip_exceeds_mtu(). */
static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)
return false;
if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
return false;
return true;
}
unsigned int
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct flow_offload_tuple_rhash *tuplehash;
struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple tuple = {};
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
struct net_device *outdev;
const struct rtable *rt;
unsigned int thoff;
struct iphdr *iph;
__be32 nexthop;
if (skb->protocol != htons(ETH_P_IP))
return NF_ACCEPT;
if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
if (tuplehash == NULL)
return NF_ACCEPT;
outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
if (!outdev)
return NF_ACCEPT;
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
(ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
return NF_ACCEPT;
if (skb_try_make_writable(skb, sizeof(*iph)))
return NF_DROP;
thoff = ip_hdr(skb)->ihl * 4;
if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
return NF_ACCEPT;
if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
return NF_DROP;
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
iph = ip_hdr(skb);
ip_decrease_ttl(iph);
skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
return NF_STOLEN;
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
struct in6_addr *addr,
struct in6_addr *new_addr)
{
struct tcphdr *tcph;
if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
skb_try_make_writable(skb, thoff + sizeof(*tcph)))
return -1;
tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
new_addr->s6_addr32, true);
return 0;
}
static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
struct in6_addr *addr,
struct in6_addr *new_addr)
{
struct udphdr *udph;
if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
skb_try_make_writable(skb, thoff + sizeof(*udph)))
return -1;
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
new_addr->s6_addr32, true);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
return 0;
}
static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
unsigned int thoff, struct in6_addr *addr,
struct in6_addr *new_addr)
{
switch (ip6h->nexthdr) {
case IPPROTO_TCP:
if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
return NF_DROP;
break;
case IPPROTO_UDP:
if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
return NF_DROP;
break;
}
return 0;
}
static int nf_flow_snat_ipv6(const struct flow_offload *flow,
struct sk_buff *skb, struct ipv6hdr *ip6h,
unsigned int thoff,
enum flow_offload_tuple_dir dir)
{
struct in6_addr addr, new_addr;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = ip6h->saddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
ip6h->saddr = new_addr;
break;
case FLOW_OFFLOAD_DIR_REPLY:
addr = ip6h->daddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
ip6h->daddr = new_addr;
break;
default:
return -1;
}
return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
}
static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
struct sk_buff *skb, struct ipv6hdr *ip6h,
unsigned int thoff,
enum flow_offload_tuple_dir dir)
{
struct in6_addr addr, new_addr;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = ip6h->daddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
ip6h->daddr = new_addr;
break;
case FLOW_OFFLOAD_DIR_REPLY:
addr = ip6h->saddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
ip6h->saddr = new_addr;
break;
default:
return -1;
}
return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
}
static int nf_flow_nat_ipv6(const struct flow_offload *flow,
struct sk_buff *skb,
enum flow_offload_tuple_dir dir)
{
struct ipv6hdr *ip6h = ipv6_hdr(skb);
unsigned int thoff = sizeof(*ip6h);
if (flow->flags & FLOW_OFFLOAD_SNAT &&
(nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
return -1;
if (flow->flags & FLOW_OFFLOAD_DNAT &&
(nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
return -1;
return 0;
}
static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
struct flow_offload_tuple *tuple)
{
struct flow_ports *ports;
struct ipv6hdr *ip6h;
unsigned int thoff;
if (!pskb_may_pull(skb, sizeof(*ip6h)))
return -1;
ip6h = ipv6_hdr(skb);
if (ip6h->nexthdr != IPPROTO_TCP &&
ip6h->nexthdr != IPPROTO_UDP)
return -1;
thoff = sizeof(*ip6h);
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;
ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
tuple->src_v6 = ip6h->saddr;
tuple->dst_v6 = ip6h->daddr;
tuple->src_port = ports->source;
tuple->dst_port = ports->dest;
tuple->l3proto = AF_INET6;
tuple->l4proto = ip6h->nexthdr;
tuple->iifidx = dev->ifindex;
return 0;
}
unsigned int
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct flow_offload_tuple_rhash *tuplehash;
struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple tuple = {};
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
struct net_device *outdev;
struct in6_addr *nexthop;
struct ipv6hdr *ip6h;
struct rt6_info *rt;
if (skb->protocol != htons(ETH_P_IPV6))
return NF_ACCEPT;
if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
return NF_ACCEPT;
tuplehash = flow_offload_lookup(flow_table, &tuple);
if (tuplehash == NULL)
return NF_ACCEPT;
outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
if (!outdev)
return NF_ACCEPT;
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
return NF_ACCEPT;
if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
sizeof(*ip6h)))
return NF_ACCEPT;
if (skb_try_make_writable(skb, sizeof(*ip6h)))
return NF_DROP;
if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
nf_flow_nat_ipv6(flow, skb, dir) < 0)
return NF_DROP;
flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
ip6h = ipv6_hdr(skb);
ip6h->hop_limit--;
skb->dev = outdev;
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
return NF_STOLEN;
}
EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);

View File

@ -157,7 +157,7 @@ EXPORT_SYMBOL(nf_nat_used_tuple);
static int in_range(const struct nf_nat_l3proto *l3proto,
const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range)
const struct nf_nat_range2 *range)
{
/* If we are supposed to map IPs, then we must be in the
* range specified, otherwise let this drag us onto a new src IP.
@ -194,7 +194,7 @@ find_appropriate_src(struct net *net,
const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
const struct nf_nat_range2 *range)
{
unsigned int h = hash_by_src(net, tuple);
const struct nf_conn *ct;
@ -224,7 +224,7 @@ find_appropriate_src(struct net *net,
static void
find_best_ips_proto(const struct nf_conntrack_zone *zone,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
const struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
@ -298,7 +298,7 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone,
static void
get_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig_tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
@ -349,9 +349,10 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
/* Only bother mapping if it's not already in range and unique */
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
if (l4proto->in_range(tuple, maniptype,
&range->min_proto,
&range->max_proto) &&
if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
l4proto->in_range(tuple, maniptype,
&range->min_proto,
&range->max_proto) &&
(range->min_proto.all == range->max_proto.all ||
!nf_nat_used_tuple(tuple, ct)))
goto out;
@ -360,7 +361,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
}
}
/* Last change: get protocol to try to obtain unique tuple. */
/* Last chance: get protocol to try to obtain unique tuple. */
l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct);
out:
rcu_read_unlock();
@ -381,7 +382,7 @@ EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add);
unsigned int
nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype)
{
struct net *net = nf_ct_net(ct);
@ -459,7 +460,7 @@ __nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip)
(manip == NF_NAT_MANIP_SRC ?
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
struct nf_nat_range range = {
struct nf_nat_range2 range = {
.flags = NF_NAT_RANGE_MAP_IPS,
.min_addr = ip,
.max_addr = ip,
@ -702,7 +703,7 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
static int nfnetlink_parse_nat_proto(struct nlattr *attr,
const struct nf_conn *ct,
struct nf_nat_range *range)
struct nf_nat_range2 *range)
{
struct nlattr *tb[CTA_PROTONAT_MAX+1];
const struct nf_nat_l4proto *l4proto;
@ -730,7 +731,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
static int
nfnetlink_parse_nat(const struct nlattr *nat,
const struct nf_conn *ct, struct nf_nat_range *range,
const struct nf_conn *ct, struct nf_nat_range2 *range,
const struct nf_nat_l3proto *l3proto)
{
struct nlattr *tb[CTA_NAT_MAX+1];
@ -758,7 +759,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
enum nf_nat_manip_type manip,
const struct nlattr *attr)
{
struct nf_nat_range range;
struct nf_nat_range2 range;
const struct nf_nat_l3proto *l3proto;
int err;

View File

@ -191,7 +191,7 @@ EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
void nf_nat_follow_master(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
struct nf_nat_range range;
struct nf_nat_range2 range;
/* This must be a fresh one. */
BUG_ON(ct->status & IPS_NAT_DONE_MASK);

View File

@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range);
void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct,
u16 *rover)
@ -83,6 +83,8 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
: tuple->src.u.all);
} else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) {
off = prandom_u32();
} else if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) {
off = (ntohs(*portptr) - ntohs(range->base_proto.all));
} else {
off = *rover;
}
@ -91,7 +93,8 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
*portptr = htons(min + off % range_size);
if (++i != range_size && nf_nat_used_tuple(tuple, ct))
continue;
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL))
if (!(range->flags & (NF_NAT_RANGE_PROTO_RANDOM_ALL|
NF_NAT_RANGE_PROTO_OFFSET)))
*rover = off;
return;
}
@ -100,7 +103,7 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
struct nf_nat_range *range)
struct nf_nat_range2 *range)
{
if (tb[CTA_PROTONAT_PORT_MIN]) {
range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);

View File

@ -23,7 +23,7 @@ static u_int16_t dccp_port_rover;
static void
dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{

View File

@ -17,7 +17,7 @@ static u_int16_t nf_sctp_port_rover;
static void
sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{

View File

@ -23,7 +23,7 @@ static u16 tcp_port_rover;
static void
tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{

View File

@ -22,7 +22,7 @@ static u16 udp_port_rover;
static void
udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
@ -100,7 +100,7 @@ static bool udplite_manip_pkt(struct sk_buff *skb,
static void
udplite_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{

View File

@ -27,7 +27,7 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{

View File

@ -36,7 +36,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
__be32 newdst;
struct nf_nat_range newrange;
struct nf_nat_range2 newrange;
WARN_ON(hooknum != NF_INET_PRE_ROUTING &&
hooknum != NF_INET_LOCAL_OUT);
@ -82,10 +82,10 @@ EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4);
static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
unsigned int
nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
unsigned int hooknum)
{
struct nf_nat_range newrange;
struct nf_nat_range2 newrange;
struct in6_addr newdst;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;

View File

@ -316,7 +316,7 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
static void nf_nat_sip_expected(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
struct nf_nat_range range;
struct nf_nat_range2 range;
/* This must be a fresh one. */
BUG_ON(ct->status & IPS_NAT_DONE_MASK);

218
net/netfilter/nf_osf.c Normal file
View File

@ -0,0 +1,218 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/capability.h>
#include <linux/if.h>
#include <linux/inetdevice.h>
#include <linux/ip.h>
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/tcp.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/x_tables.h>
#include <net/netfilter/nf_log.h>
#include <linux/netfilter/nf_osf.h>
static inline int nf_osf_ttl(const struct sk_buff *skb,
const struct nf_osf_info *info,
unsigned char f_ttl)
{
const struct iphdr *ip = ip_hdr(skb);
if (info->flags & NF_OSF_TTL) {
if (info->ttl == NF_OSF_TTL_TRUE)
return ip->ttl == f_ttl;
if (info->ttl == NF_OSF_TTL_NOCHECK)
return 1;
else if (ip->ttl <= f_ttl)
return 1;
else {
struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
int ret = 0;
for_ifa(in_dev) {
if (inet_ifa_match(ip->saddr, ifa)) {
ret = (ip->ttl == f_ttl);
break;
}
}
endfor_ifa(in_dev);
return ret;
}
}
return ip->ttl == f_ttl;
}
bool
nf_osf_match(const struct sk_buff *skb, u_int8_t family,
int hooknum, struct net_device *in, struct net_device *out,
const struct nf_osf_info *info, struct net *net,
const struct list_head *nf_osf_fingers)
{
const unsigned char *optp = NULL, *_optp = NULL;
unsigned int optsize = 0, check_WSS = 0;
int fmatch = FMATCH_WRONG, fcount = 0;
const struct iphdr *ip = ip_hdr(skb);
const struct nf_osf_user_finger *f;
unsigned char opts[MAX_IPOPTLEN];
const struct nf_osf_finger *kf;
u16 window, totlen, mss = 0;
const struct tcphdr *tcp;
struct tcphdr _tcph;
bool df;
tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
if (!tcp)
return false;
if (!tcp->syn)
return false;
totlen = ntohs(ip->tot_len);
df = ntohs(ip->frag_off) & IP_DF;
window = ntohs(tcp->window);
if (tcp->doff * 4 > sizeof(struct tcphdr)) {
optsize = tcp->doff * 4 - sizeof(struct tcphdr);
_optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
sizeof(struct tcphdr), optsize, opts);
}
list_for_each_entry_rcu(kf, &nf_osf_fingers[df], finger_entry) {
int foptsize, optnum;
f = &kf->finger;
if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre))
continue;
optp = _optp;
fmatch = FMATCH_WRONG;
if (totlen != f->ss || !nf_osf_ttl(skb, info, f->ttl))
continue;
/*
* Should not happen if userspace parser was written correctly.
*/
if (f->wss.wc >= OSF_WSS_MAX)
continue;
/* Check options */
foptsize = 0;
for (optnum = 0; optnum < f->opt_num; ++optnum)
foptsize += f->opt[optnum].length;
if (foptsize > MAX_IPOPTLEN ||
optsize > MAX_IPOPTLEN ||
optsize != foptsize)
continue;
check_WSS = f->wss.wc;
for (optnum = 0; optnum < f->opt_num; ++optnum) {
if (f->opt[optnum].kind == (*optp)) {
__u32 len = f->opt[optnum].length;
const __u8 *optend = optp + len;
fmatch = FMATCH_OK;
switch (*optp) {
case OSFOPT_MSS:
mss = optp[3];
mss <<= 8;
mss |= optp[2];
mss = ntohs((__force __be16)mss);
break;
case OSFOPT_TS:
break;
}
optp = optend;
} else
fmatch = FMATCH_OPT_WRONG;
if (fmatch != FMATCH_OK)
break;
}
if (fmatch != FMATCH_OPT_WRONG) {
fmatch = FMATCH_WRONG;
switch (check_WSS) {
case OSF_WSS_PLAIN:
if (f->wss.val == 0 || window == f->wss.val)
fmatch = FMATCH_OK;
break;
case OSF_WSS_MSS:
/*
* Some smart modems decrease mangle MSS to
* SMART_MSS_2, so we check standard, decreased
* and the one provided in the fingerprint MSS
* values.
*/
#define SMART_MSS_1 1460
#define SMART_MSS_2 1448
if (window == f->wss.val * mss ||
window == f->wss.val * SMART_MSS_1 ||
window == f->wss.val * SMART_MSS_2)
fmatch = FMATCH_OK;
break;
case OSF_WSS_MTU:
if (window == f->wss.val * (mss + 40) ||
window == f->wss.val * (SMART_MSS_1 + 40) ||
window == f->wss.val * (SMART_MSS_2 + 40))
fmatch = FMATCH_OK;
break;
case OSF_WSS_MODULO:
if ((window % f->wss.val) == 0)
fmatch = FMATCH_OK;
break;
}
}
if (fmatch != FMATCH_OK)
continue;
fcount++;
if (info->flags & NF_OSF_LOG)
nf_log_packet(net, family, hooknum, skb,
in, out, NULL,
"%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
f->genre, f->version, f->subtype,
&ip->saddr, ntohs(tcp->source),
&ip->daddr, ntohs(tcp->dest),
f->ttl - ip->ttl);
if ((info->flags & NF_OSF_LOG) &&
info->loglevel == NF_OSF_LOGLEVEL_FIRST)
break;
}
if (!fcount && (info->flags & NF_OSF_LOG))
nf_log_packet(net, family, hooknum, skb, in, out, NULL,
"Remote OS is not known: %pI4:%u -> %pI4:%u\n",
&ip->saddr, ntohs(tcp->source),
&ip->daddr, ntohs(tcp->dest));
if (fcount)
fmatch = FMATCH_OK;
return fmatch == FMATCH_OK;
}
EXPORT_SYMBOL_GPL(nf_osf_match);
MODULE_LICENSE("GPL");

File diff suppressed because it is too large Load Diff

View File

@ -251,6 +251,9 @@ static struct nft_expr_type *nft_basic_types[] = {
&nft_payload_type,
&nft_dynset_type,
&nft_range_type,
&nft_meta_type,
&nft_rt_type,
&nft_exthdr_type,
};
int __init nf_tables_core_module_init(void)

View File

@ -37,7 +37,6 @@
#include <net/sock.h>
#include <net/netfilter/nf_log.h>
#include <net/netns/generic.h>
#include <net/netfilter/nfnetlink_log.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
@ -47,6 +46,7 @@
#include "../bridge/br_private.h"
#endif
#define NFULNL_COPY_DISABLED 0xff
#define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE
#define NFULNL_TIMEOUT_DEFAULT 100 /* every second */
#define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */
@ -618,7 +618,7 @@ static const struct nf_loginfo default_loginfo = {
};
/* log handler for internal netfilter logging api */
void
static void
nfulnl_log_packet(struct net *net,
u_int8_t pf,
unsigned int hooknum,
@ -633,7 +633,7 @@ nfulnl_log_packet(struct net *net,
struct nfulnl_instance *inst;
const struct nf_loginfo *li;
unsigned int qthreshold;
unsigned int plen;
unsigned int plen = 0;
struct nfnl_log_net *log = nfnl_log_pernet(net);
const struct nfnl_ct_hook *nfnl_ct = NULL;
struct nf_conn *ct = NULL;
@ -648,7 +648,6 @@ nfulnl_log_packet(struct net *net,
if (!inst)
return;
plen = 0;
if (prefix)
plen = strlen(prefix) + 1;
@ -760,7 +759,6 @@ alloc_failure:
/* FIXME: statistics */
goto unlock_and_release;
}
EXPORT_SYMBOL_GPL(nfulnl_log_packet);
static int
nfulnl_rcv_nl_event(struct notifier_block *this,

View File

@ -36,7 +36,7 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
u64 timeout;
void *elem;
if (set->size && !atomic_add_unless(&set->nelems, 1, set->size))
if (!atomic_add_unless(&set->nelems, 1, set->size))
return NULL;
timeout = priv->timeout ? : set->timeout;
@ -81,7 +81,7 @@ static void nft_dynset_eval(const struct nft_expr *expr,
if (priv->op == NFT_DYNSET_OP_UPDATE &&
nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
timeout = priv->timeout ? : set->timeout;
*nft_set_ext_expiration(ext) = jiffies + timeout;
*nft_set_ext_expiration(ext) = get_jiffies_64() + timeout;
}
if (sexpr != NULL)
@ -216,6 +216,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (err < 0)
goto err1;
if (set->size == 0)
set->size = 0xffff;
priv->set = set;
return 0;

View File

@ -10,11 +10,10 @@
#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#include <net/tcp.h>
@ -353,7 +352,6 @@ static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
return nft_exthdr_dump_common(skb, priv);
}
static struct nft_expr_type nft_exthdr_type;
static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
.type = &nft_exthdr_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
@ -407,27 +405,10 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
return ERR_PTR(-EOPNOTSUPP);
}
static struct nft_expr_type nft_exthdr_type __read_mostly = {
struct nft_expr_type nft_exthdr_type __read_mostly = {
.name = "exthdr",
.select_ops = nft_exthdr_select_ops,
.policy = nft_exthdr_policy,
.maxattr = NFTA_EXTHDR_MAX,
.owner = THIS_MODULE,
};
static int __init nft_exthdr_module_init(void)
{
return nft_register_expr(&nft_exthdr_type);
}
static void __exit nft_exthdr_module_exit(void)
{
nft_unregister_expr(&nft_exthdr_type);
}
module_init(nft_exthdr_module_init);
module_exit(nft_exthdr_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("exthdr");

View File

@ -142,9 +142,8 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
if (!tb[NFTA_FLOW_TABLE_NAME])
return -EINVAL;
flowtable = nf_tables_flowtable_lookup(ctx->table,
tb[NFTA_FLOW_TABLE_NAME],
genmask);
flowtable = nft_flowtable_lookup(ctx->table, tb[NFTA_FLOW_TABLE_NAME],
genmask);
if (IS_ERR(flowtable))
return PTR_ERR(flowtable);

View File

@ -97,7 +97,7 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
priv->len = len;
priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
if (priv->modulus <= 1)
if (priv->modulus < 1)
return -ERANGE;
if (priv->offset + priv->modulus - 1 < priv->offset)

View File

@ -1,5 +1,7 @@
/*
* Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
* Copyright (c) 2014 Intel Corporation
* Author: Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@ -9,8 +11,6 @@
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
@ -24,21 +24,35 @@
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nft_meta.h>
#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
struct nft_meta {
enum nft_meta_keys key:8;
union {
enum nft_registers dreg:8;
enum nft_registers sreg:8;
};
};
static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
#ifdef CONFIG_NF_TABLES_BRIDGE
#include "../bridge/br_private.h"
#endif
static void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
struct sock *sk;
u32 *dest = &regs->data[priv->dreg];
#ifdef CONFIG_NF_TABLES_BRIDGE
const struct net_bridge_port *p;
#endif
switch (priv->key) {
case NFT_META_LEN:
@ -214,6 +228,18 @@ void nft_meta_get_eval(const struct nft_expr *expr,
case NFT_META_SECPATH:
nft_reg_store8(dest, !!skb->sp);
break;
#endif
#ifdef CONFIG_NF_TABLES_BRIDGE
case NFT_META_BRI_IIFNAME:
if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
goto err;
strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
return;
case NFT_META_BRI_OIFNAME:
if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
goto err;
strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
return;
#endif
default:
WARN_ON(1);
@ -224,11 +250,10 @@ void nft_meta_get_eval(const struct nft_expr *expr,
err:
regs->verdict.code = NFT_BREAK;
}
EXPORT_SYMBOL_GPL(nft_meta_get_eval);
void nft_meta_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
static void nft_meta_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *meta = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
@ -258,18 +283,16 @@ void nft_meta_set_eval(const struct nft_expr *expr,
WARN_ON(1);
}
}
EXPORT_SYMBOL_GPL(nft_meta_set_eval);
const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
[NFTA_META_DREG] = { .type = NLA_U32 },
[NFTA_META_KEY] = { .type = NLA_U32 },
[NFTA_META_SREG] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(nft_meta_policy);
int nft_meta_get_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
static int nft_meta_get_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int len;
@ -317,6 +340,14 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
case NFT_META_SECPATH:
len = sizeof(u8);
break;
#endif
#ifdef CONFIG_NF_TABLES_BRIDGE
case NFT_META_BRI_IIFNAME:
case NFT_META_BRI_OIFNAME:
if (ctx->family != NFPROTO_BRIDGE)
return -EOPNOTSUPP;
len = IFNAMSIZ;
break;
#endif
default:
return -EOPNOTSUPP;
@ -326,7 +357,6 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
return nft_validate_register_store(ctx, priv->dreg, NULL,
NFT_DATA_VALUE, len);
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
static int nft_meta_get_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
@ -360,9 +390,9 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx,
#endif
}
int nft_meta_set_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
static int nft_meta_set_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int hooks;
@ -388,11 +418,10 @@ int nft_meta_set_validate(const struct nft_ctx *ctx,
return nft_chain_validate_hooks(ctx->chain, hooks);
}
EXPORT_SYMBOL_GPL(nft_meta_set_validate);
int nft_meta_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
static int nft_meta_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int len;
@ -424,10 +453,9 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
return 0;
}
EXPORT_SYMBOL_GPL(nft_meta_set_init);
int nft_meta_get_dump(struct sk_buff *skb,
const struct nft_expr *expr)
static int nft_meta_get_dump(struct sk_buff *skb,
const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@ -440,10 +468,8 @@ int nft_meta_get_dump(struct sk_buff *skb,
nla_put_failure:
return -1;
}
EXPORT_SYMBOL_GPL(nft_meta_get_dump);
int nft_meta_set_dump(struct sk_buff *skb,
const struct nft_expr *expr)
static int nft_meta_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@ -457,19 +483,16 @@ int nft_meta_set_dump(struct sk_buff *skb,
nla_put_failure:
return -1;
}
EXPORT_SYMBOL_GPL(nft_meta_set_dump);
void nft_meta_set_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
static void nft_meta_set_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
if (priv->key == NFT_META_NFTRACE)
static_branch_dec(&nft_trace_enabled);
}
EXPORT_SYMBOL_GPL(nft_meta_set_destroy);
static struct nft_expr_type nft_meta_type;
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
@ -508,27 +531,10 @@ nft_meta_select_ops(const struct nft_ctx *ctx,
return ERR_PTR(-EINVAL);
}
static struct nft_expr_type nft_meta_type __read_mostly = {
struct nft_expr_type nft_meta_type __read_mostly = {
.name = "meta",
.select_ops = nft_meta_select_ops,
.policy = nft_meta_policy,
.maxattr = NFTA_META_MAX,
.owner = THIS_MODULE,
};
static int __init nft_meta_module_init(void)
{
return nft_register_expr(&nft_meta_type);
}
static void __exit nft_meta_module_exit(void)
{
nft_unregister_expr(&nft_meta_type);
}
module_init(nft_meta_module_init);
module_exit(nft_meta_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_ALIAS_NFT_EXPR("meta");

View File

@ -43,7 +43,7 @@ static void nft_nat_eval(const struct nft_expr *expr,
const struct nft_nat *priv = nft_expr_priv(expr);
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo);
struct nf_nat_range range;
struct nf_nat_range2 range;
memset(&range, 0, sizeof(range));
if (priv->sreg_addr_min) {

View File

@ -24,13 +24,11 @@ struct nft_ng_inc {
u32 modulus;
atomic_t counter;
u32 offset;
struct nft_set *map;
};
static void nft_ng_inc_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
static u32 nft_ng_inc_gen(struct nft_ng_inc *priv)
{
struct nft_ng_inc *priv = nft_expr_priv(expr);
u32 nval, oval;
do {
@ -38,7 +36,36 @@ static void nft_ng_inc_eval(const struct nft_expr *expr,
nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
} while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
regs->data[priv->dreg] = nval + priv->offset;
return nval + priv->offset;
}
static void nft_ng_inc_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_ng_inc *priv = nft_expr_priv(expr);
regs->data[priv->dreg] = nft_ng_inc_gen(priv);
}
static void nft_ng_inc_map_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_ng_inc *priv = nft_expr_priv(expr);
const struct nft_set *map = priv->map;
const struct nft_set_ext *ext;
u32 result;
bool found;
result = nft_ng_inc_gen(priv);
found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
if (!found)
return;
nft_data_copy(&regs->data[priv->dreg],
nft_set_ext_data(ext), map->dlen);
}
static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
@ -46,6 +73,9 @@ static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
[NFTA_NG_MODULUS] = { .type = NLA_U32 },
[NFTA_NG_TYPE] = { .type = NLA_U32 },
[NFTA_NG_OFFSET] = { .type = NLA_U32 },
[NFTA_NG_SET_NAME] = { .type = NLA_STRING,
.len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_NG_SET_ID] = { .type = NLA_U32 },
};
static int nft_ng_inc_init(const struct nft_ctx *ctx,
@ -71,6 +101,25 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, sizeof(u32));
}
static int nft_ng_inc_map_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_ng_inc *priv = nft_expr_priv(expr);
u8 genmask = nft_genmask_next(ctx->net);
nft_ng_inc_init(ctx, expr, tb);
priv->map = nft_set_lookup_global(ctx->net, ctx->table,
tb[NFTA_NG_SET_NAME],
tb[NFTA_NG_SET_ID], genmask);
if (IS_ERR(priv->map))
return PTR_ERR(priv->map);
return 0;
}
static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
u32 modulus, enum nft_ng_types type, u32 offset)
{
@ -97,6 +146,22 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
priv->offset);
}
static int nft_ng_inc_map_dump(struct sk_buff *skb,
const struct nft_expr *expr)
{
const struct nft_ng_inc *priv = nft_expr_priv(expr);
if (nft_ng_dump(skb, priv->dreg, priv->modulus,
NFT_NG_INCREMENTAL, priv->offset) ||
nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name))
goto nla_put_failure;
return 0;
nla_put_failure:
return -1;
}
struct nft_ng_random {
enum nft_registers dreg:8;
u32 modulus;
@ -156,6 +221,14 @@ static const struct nft_expr_ops nft_ng_inc_ops = {
.dump = nft_ng_inc_dump,
};
static const struct nft_expr_ops nft_ng_inc_map_ops = {
.type = &nft_ng_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
.eval = nft_ng_inc_map_eval,
.init = nft_ng_inc_map_init,
.dump = nft_ng_inc_map_dump,
};
static const struct nft_expr_ops nft_ng_random_ops = {
.type = &nft_ng_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
@ -178,6 +251,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
switch (type) {
case NFT_NG_INCREMENTAL:
if (tb[NFTA_NG_SET_NAME])
return &nft_ng_inc_map_ops;
return &nft_ng_inc_ops;
case NFT_NG_RANDOM:
return &nft_ng_random_ops;

View File

@ -38,8 +38,8 @@ static int nft_objref_init(const struct nft_ctx *ctx,
return -EINVAL;
objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE]));
obj = nf_tables_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
genmask);
obj = nft_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
genmask);
if (IS_ERR(obj))
return -ENOENT;

View File

@ -7,8 +7,6 @@
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
@ -179,7 +177,6 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp
return nft_chain_validate_hooks(ctx->chain, hooks);
}
static struct nft_expr_type nft_rt_type;
static const struct nft_expr_ops nft_rt_get_ops = {
.type = &nft_rt_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_rt)),
@ -189,27 +186,10 @@ static const struct nft_expr_ops nft_rt_get_ops = {
.validate = nft_rt_validate,
};
static struct nft_expr_type nft_rt_type __read_mostly = {
struct nft_expr_type nft_rt_type __read_mostly = {
.name = "rt",
.ops = &nft_rt_get_ops,
.policy = nft_rt_policy,
.maxattr = NFTA_RT_MAX,
.owner = THIS_MODULE,
};
static int __init nft_rt_module_init(void)
{
return nft_register_expr(&nft_rt_type);
}
static void __exit nft_rt_module_exit(void)
{
nft_unregister_expr(&nft_rt_type);
}
module_init(nft_rt_module_init);
module_exit(nft_rt_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Anders K. Pedersen <akp@cohaesio.com>");
MODULE_ALIAS_NFT_EXPR("rt");

View File

@ -296,27 +296,23 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
static struct nft_set_type nft_bitmap_type;
static struct nft_set_ops nft_bitmap_ops __read_mostly = {
.type = &nft_bitmap_type,
.privsize = nft_bitmap_privsize,
.elemsize = offsetof(struct nft_bitmap_elem, ext),
.estimate = nft_bitmap_estimate,
.init = nft_bitmap_init,
.destroy = nft_bitmap_destroy,
.insert = nft_bitmap_insert,
.remove = nft_bitmap_remove,
.deactivate = nft_bitmap_deactivate,
.flush = nft_bitmap_flush,
.activate = nft_bitmap_activate,
.lookup = nft_bitmap_lookup,
.walk = nft_bitmap_walk,
.get = nft_bitmap_get,
};
static struct nft_set_type nft_bitmap_type __read_mostly = {
.ops = &nft_bitmap_ops,
.owner = THIS_MODULE,
.ops = {
.privsize = nft_bitmap_privsize,
.elemsize = offsetof(struct nft_bitmap_elem, ext),
.estimate = nft_bitmap_estimate,
.init = nft_bitmap_init,
.destroy = nft_bitmap_destroy,
.insert = nft_bitmap_insert,
.remove = nft_bitmap_remove,
.deactivate = nft_bitmap_deactivate,
.flush = nft_bitmap_flush,
.activate = nft_bitmap_activate,
.lookup = nft_bitmap_lookup,
.walk = nft_bitmap_walk,
.get = nft_bitmap_get,
},
};
static int __init nft_bitmap_module_init(void)

View File

@ -605,6 +605,12 @@ static void nft_hash_destroy(const struct nft_set *set)
static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
struct nft_set_estimate *est)
{
if (!desc->size)
return false;
if (desc->klen == 4)
return false;
est->size = sizeof(struct nft_hash) +
nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
desc->size * sizeof(struct nft_hash_elem);
@ -614,91 +620,100 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
static struct nft_set_type nft_hash_type;
static struct nft_set_ops nft_rhash_ops __read_mostly = {
.type = &nft_hash_type,
.privsize = nft_rhash_privsize,
.elemsize = offsetof(struct nft_rhash_elem, ext),
.estimate = nft_rhash_estimate,
.init = nft_rhash_init,
.destroy = nft_rhash_destroy,
.insert = nft_rhash_insert,
.activate = nft_rhash_activate,
.deactivate = nft_rhash_deactivate,
.flush = nft_rhash_flush,
.remove = nft_rhash_remove,
.lookup = nft_rhash_lookup,
.update = nft_rhash_update,
.walk = nft_rhash_walk,
.get = nft_rhash_get,
.features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
};
static struct nft_set_ops nft_hash_ops __read_mostly = {
.type = &nft_hash_type,
.privsize = nft_hash_privsize,
.elemsize = offsetof(struct nft_hash_elem, ext),
.estimate = nft_hash_estimate,
.init = nft_hash_init,
.destroy = nft_hash_destroy,
.insert = nft_hash_insert,
.activate = nft_hash_activate,
.deactivate = nft_hash_deactivate,
.flush = nft_hash_flush,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.walk = nft_hash_walk,
.get = nft_hash_get,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
};
static struct nft_set_ops nft_hash_fast_ops __read_mostly = {
.type = &nft_hash_type,
.privsize = nft_hash_privsize,
.elemsize = offsetof(struct nft_hash_elem, ext),
.estimate = nft_hash_estimate,
.init = nft_hash_init,
.destroy = nft_hash_destroy,
.insert = nft_hash_insert,
.activate = nft_hash_activate,
.deactivate = nft_hash_deactivate,
.flush = nft_hash_flush,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup_fast,
.walk = nft_hash_walk,
.get = nft_hash_get,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
};
static const struct nft_set_ops *
nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
u32 flags)
static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features,
struct nft_set_estimate *est)
{
if (desc->size && !(flags & (NFT_SET_EVAL | NFT_SET_TIMEOUT))) {
switch (desc->klen) {
case 4:
return &nft_hash_fast_ops;
default:
return &nft_hash_ops;
}
}
if (!desc->size)
return false;
return &nft_rhash_ops;
if (desc->klen != 4)
return false;
est->size = sizeof(struct nft_hash) +
nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
desc->size * sizeof(struct nft_hash_elem);
est->lookup = NFT_SET_CLASS_O_1;
est->space = NFT_SET_CLASS_O_N;
return true;
}
static struct nft_set_type nft_hash_type __read_mostly = {
.select_ops = nft_hash_select_ops,
static struct nft_set_type nft_rhash_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_MAP | NFT_SET_OBJECT |
NFT_SET_TIMEOUT | NFT_SET_EVAL,
.ops = {
.privsize = nft_rhash_privsize,
.elemsize = offsetof(struct nft_rhash_elem, ext),
.estimate = nft_rhash_estimate,
.init = nft_rhash_init,
.destroy = nft_rhash_destroy,
.insert = nft_rhash_insert,
.activate = nft_rhash_activate,
.deactivate = nft_rhash_deactivate,
.flush = nft_rhash_flush,
.remove = nft_rhash_remove,
.lookup = nft_rhash_lookup,
.update = nft_rhash_update,
.walk = nft_rhash_walk,
.get = nft_rhash_get,
},
};
static struct nft_set_type nft_hash_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
.ops = {
.privsize = nft_hash_privsize,
.elemsize = offsetof(struct nft_hash_elem, ext),
.estimate = nft_hash_estimate,
.init = nft_hash_init,
.destroy = nft_hash_destroy,
.insert = nft_hash_insert,
.activate = nft_hash_activate,
.deactivate = nft_hash_deactivate,
.flush = nft_hash_flush,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
.walk = nft_hash_walk,
.get = nft_hash_get,
},
};
static struct nft_set_type nft_hash_fast_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
.ops = {
.privsize = nft_hash_privsize,
.elemsize = offsetof(struct nft_hash_elem, ext),
.estimate = nft_hash_fast_estimate,
.init = nft_hash_init,
.destroy = nft_hash_destroy,
.insert = nft_hash_insert,
.activate = nft_hash_activate,
.deactivate = nft_hash_deactivate,
.flush = nft_hash_flush,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup_fast,
.walk = nft_hash_walk,
.get = nft_hash_get,
},
};
static int __init nft_hash_module_init(void)
{
return nft_register_set(&nft_hash_type);
if (nft_register_set(&nft_hash_fast_type) ||
nft_register_set(&nft_hash_type) ||
nft_register_set(&nft_rhash_type))
return 1;
return 0;
}
static void __exit nft_hash_module_exit(void)
{
nft_unregister_set(&nft_rhash_type);
nft_unregister_set(&nft_hash_type);
nft_unregister_set(&nft_hash_fast_type);
}
module_init(nft_hash_module_init);

View File

@ -393,28 +393,24 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
static struct nft_set_type nft_rbtree_type;
static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.type = &nft_rbtree_type,
.privsize = nft_rbtree_privsize,
.elemsize = offsetof(struct nft_rbtree_elem, ext),
.estimate = nft_rbtree_estimate,
.init = nft_rbtree_init,
.destroy = nft_rbtree_destroy,
.insert = nft_rbtree_insert,
.remove = nft_rbtree_remove,
.deactivate = nft_rbtree_deactivate,
.flush = nft_rbtree_flush,
.activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
.get = nft_rbtree_get,
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
};
static struct nft_set_type nft_rbtree_type __read_mostly = {
.ops = &nft_rbtree_ops,
.owner = THIS_MODULE,
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT,
.ops = {
.privsize = nft_rbtree_privsize,
.elemsize = offsetof(struct nft_rbtree_elem, ext),
.estimate = nft_rbtree_estimate,
.init = nft_rbtree_init,
.destroy = nft_rbtree_destroy,
.insert = nft_rbtree_insert,
.remove = nft_rbtree_remove,
.deactivate = nft_rbtree_deactivate,
.flush = nft_rbtree_flush,
.activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
.get = nft_rbtree_get,
},
};
static int __init nft_rbtree_module_init(void)

View File

@ -21,8 +21,8 @@
static unsigned int
netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct nf_nat_range *range = par->targinfo;
struct nf_nat_range newrange;
const struct nf_nat_range2 *range = par->targinfo;
struct nf_nat_range2 newrange;
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
union nf_inet_addr new_addr, netmask;
@ -56,7 +56,7 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
{
const struct nf_nat_range *range = par->targinfo;
const struct nf_nat_range2 *range = par->targinfo;
if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
return -EINVAL;
@ -75,7 +75,7 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
enum ip_conntrack_info ctinfo;
__be32 new_ip, netmask;
const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
struct nf_nat_range newrange;
struct nf_nat_range2 newrange;
WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING &&
xt_hooknum(par) != NF_INET_POST_ROUTING &&

View File

@ -13,7 +13,6 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_NFLOG.h>
#include <net/netfilter/nf_log.h>
#include <net/netfilter/nfnetlink_log.h>
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG");
@ -37,8 +36,9 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
if (info->flags & XT_NFLOG_F_COPY_LEN)
li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
nfulnl_log_packet(net, xt_family(par), xt_hooknum(par), skb,
xt_in(par), xt_out(par), &li, info->prefix);
nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par),
xt_out(par), &li, "%s", info->prefix);
return XT_CONTINUE;
}
@ -50,7 +50,13 @@ static int nflog_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
if (info->prefix[sizeof(info->prefix) - 1] != '\0')
return -EINVAL;
return 0;
return nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
}
static void nflog_tg_destroy(const struct xt_tgdtor_param *par)
{
nf_logger_put(par->family, NF_LOG_TYPE_ULOG);
}
static struct xt_target nflog_tg_reg __read_mostly = {
@ -58,6 +64,7 @@ static struct xt_target nflog_tg_reg __read_mostly = {
.revision = 0,
.family = NFPROTO_UNSPEC,
.checkentry = nflog_tg_check,
.destroy = nflog_tg_destroy,
.target = nflog_tg,
.targetsize = sizeof(struct xt_nflog_info),
.me = THIS_MODULE,

Some files were not shown because too many files have changed in this diff Show More