Merge branch 'nf-ingress'
Pablo Neira Ayuso says: ==================== Netfilter ingress support (v4) This is the v4 round of patches to add the Netfilter ingress hook, it basically comes in two steps: 1) Add the CONFIG_NET_INGRESS switch to wrap the ingress static key around it. The idea is to use the same global static key to avoid adding more code to the hot path. 2) Add the Netfilter ingress hook after the tc ingress hook, under the global ingress_needed static key. As I said, the netfilter ingress hook also has its own static key, that is nested under the global static key. Please, see patch 5/5 for performance numbers and more information. I originally started this next round, as it was suggested, exploring the independent static key for netfilter ingress just after tc ingress, but the results that I gathered from that patch are not good for non-users: Result: OK: 6425927(c6425843+d83) usec, 100000000 (60byte,0frags) 15561955pps 7469Mb/sec (7469738400bps) errors: 100000000 this roughly means 500Kpps less performance wrt. the base numbers, so that's the reason why I discarded that approach and I focused on this. The idea of this patchset is to open the window to nf_tables, which comes with features that will work out-of-the-box (once the boiler plate code to support the 'netdev' table family is in place), to avoid repeating myself [1], the most relevant features are: 1) Multi-dimensional key dictionary lookups. 2) Arbitrary stateful flow tables. 3) Transactions and good support for dynamic updates. But there are also interest aspects to consider from userspace, such as the ability to support new layer 2 protocols without kernel updates, a well-defined netlink interface, userspace libraries and utilities for third party applications, among others. I hope we can be happy with this approach. Please, apply. Thanks. [1] http://marc.info/?l=netfilter-devel&m=143033337020328&w=2 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
5a99e7f22b
|
@ -1656,6 +1656,9 @@ struct net_device {
|
|||
struct tcf_proto __rcu *ingress_cl_list;
|
||||
#endif
|
||||
struct netdev_queue __rcu *ingress_queue;
|
||||
#ifdef CONFIG_NETFILTER_INGRESS
|
||||
struct list_head nf_hooks_ingress;
|
||||
#endif
|
||||
|
||||
unsigned char broadcast[MAX_ADDR_LEN];
|
||||
#ifdef CONFIG_RFS_ACCEL
|
||||
|
|
|
@ -54,10 +54,12 @@ struct nf_hook_state {
|
|||
struct net_device *in;
|
||||
struct net_device *out;
|
||||
struct sock *sk;
|
||||
struct list_head *hook_list;
|
||||
int (*okfn)(struct sock *, struct sk_buff *);
|
||||
};
|
||||
|
||||
static inline void nf_hook_state_init(struct nf_hook_state *p,
|
||||
struct list_head *hook_list,
|
||||
unsigned int hook,
|
||||
int thresh, u_int8_t pf,
|
||||
struct net_device *indev,
|
||||
|
@ -71,6 +73,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
|
|||
p->in = indev;
|
||||
p->out = outdev;
|
||||
p->sk = sk;
|
||||
p->hook_list = hook_list;
|
||||
p->okfn = okfn;
|
||||
}
|
||||
|
||||
|
@ -79,16 +82,17 @@ typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops,
|
|||
const struct nf_hook_state *state);
|
||||
|
||||
struct nf_hook_ops {
|
||||
struct list_head list;
|
||||
struct list_head list;
|
||||
|
||||
/* User fills in from here down. */
|
||||
nf_hookfn *hook;
|
||||
struct module *owner;
|
||||
void *priv;
|
||||
u_int8_t pf;
|
||||
unsigned int hooknum;
|
||||
nf_hookfn *hook;
|
||||
struct net_device *dev;
|
||||
struct module *owner;
|
||||
void *priv;
|
||||
u_int8_t pf;
|
||||
unsigned int hooknum;
|
||||
/* Hooks are ordered in ascending priority. */
|
||||
int priority;
|
||||
int priority;
|
||||
};
|
||||
|
||||
struct nf_sockopt_ops {
|
||||
|
@ -131,26 +135,33 @@ extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
|||
#ifdef HAVE_JUMP_LABEL
|
||||
extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
|
||||
|
||||
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
|
||||
static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
|
||||
u_int8_t pf, unsigned int hook)
|
||||
{
|
||||
if (__builtin_constant_p(pf) &&
|
||||
__builtin_constant_p(hook))
|
||||
return static_key_false(&nf_hooks_needed[pf][hook]);
|
||||
|
||||
return !list_empty(&nf_hooks[pf][hook]);
|
||||
return !list_empty(nf_hook_list);
|
||||
}
|
||||
#else
|
||||
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
|
||||
static inline bool nf_hook_list_active(struct list_head *nf_hook_list,
|
||||
u_int8_t pf, unsigned int hook)
|
||||
{
|
||||
return !list_empty(&nf_hooks[pf][hook]);
|
||||
return !list_empty(nf_hook_list);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
|
||||
{
|
||||
return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook);
|
||||
}
|
||||
|
||||
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
|
||||
|
||||
/**
|
||||
* nf_hook_thresh - call a netfilter hook
|
||||
*
|
||||
*
|
||||
* Returns 1 if the hook has allowed the packet to pass. The function
|
||||
* okfn must be invoked by the caller in this case. Any other return
|
||||
* value indicates the packet has been consumed by the hook.
|
||||
|
@ -166,8 +177,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
|
|||
if (nf_hooks_active(pf, hook)) {
|
||||
struct nf_hook_state state;
|
||||
|
||||
nf_hook_state_init(&state, hook, thresh, pf,
|
||||
indev, outdev, sk, okfn);
|
||||
nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh,
|
||||
pf, indev, outdev, sk, okfn);
|
||||
return nf_hook_slow(skb, &state);
|
||||
}
|
||||
return 1;
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
#ifndef _NETFILTER_INGRESS_H_
|
||||
#define _NETFILTER_INGRESS_H_
|
||||
|
||||
#include <linux/netfilter.h>
|
||||
#include <linux/netdevice.h>
|
||||
|
||||
#ifdef CONFIG_NETFILTER_INGRESS
|
||||
static inline int nf_hook_ingress_active(struct sk_buff *skb)
|
||||
{
|
||||
return nf_hook_list_active(&skb->dev->nf_hooks_ingress,
|
||||
NFPROTO_NETDEV, NF_NETDEV_INGRESS);
|
||||
}
|
||||
|
||||
static inline int nf_hook_ingress(struct sk_buff *skb)
|
||||
{
|
||||
struct nf_hook_state state;
|
||||
|
||||
nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
|
||||
NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
|
||||
skb->dev, NULL, NULL);
|
||||
return nf_hook_slow(skb, &state);
|
||||
}
|
||||
|
||||
static inline void nf_hook_ingress_init(struct net_device *dev)
|
||||
{
|
||||
INIT_LIST_HEAD(&dev->nf_hooks_ingress);
|
||||
}
|
||||
#else /* CONFIG_NETFILTER_INGRESS */
|
||||
static inline int nf_hook_ingress_active(struct sk_buff *skb)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int nf_hook_ingress(struct sk_buff *skb)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void nf_hook_ingress_init(struct net_device *dev) {}
|
||||
#endif /* CONFIG_NETFILTER_INGRESS */
|
||||
#endif /* _NETFILTER_INGRESS_H_ */
|
|
@ -79,7 +79,7 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
|
|||
|
||||
struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
|
||||
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
#ifdef CONFIG_NET_INGRESS
|
||||
void net_inc_ingress_queue(void);
|
||||
void net_dec_ingress_queue(void);
|
||||
#endif
|
||||
|
|
|
@ -51,11 +51,17 @@ enum nf_inet_hooks {
|
|||
NF_INET_NUMHOOKS
|
||||
};
|
||||
|
||||
enum nf_dev_hooks {
|
||||
NF_NETDEV_INGRESS,
|
||||
NF_NETDEV_NUMHOOKS
|
||||
};
|
||||
|
||||
enum {
|
||||
NFPROTO_UNSPEC = 0,
|
||||
NFPROTO_INET = 1,
|
||||
NFPROTO_IPV4 = 2,
|
||||
NFPROTO_ARP = 3,
|
||||
NFPROTO_NETDEV = 5,
|
||||
NFPROTO_BRIDGE = 7,
|
||||
NFPROTO_IPV6 = 10,
|
||||
NFPROTO_DECNET = 12,
|
||||
|
|
|
@ -45,6 +45,9 @@ config COMPAT_NETLINK_MESSAGES
|
|||
Newly written code should NEVER need this option but do
|
||||
compat-independent messages instead!
|
||||
|
||||
config NET_INGRESS
|
||||
bool
|
||||
|
||||
menu "Networking options"
|
||||
|
||||
source "net/packet/Kconfig"
|
||||
|
|
|
@ -135,6 +135,7 @@
|
|||
#include <linux/if_macvlan.h>
|
||||
#include <linux/errqueue.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/netfilter_ingress.h>
|
||||
|
||||
#include "net-sysfs.h"
|
||||
|
||||
|
@ -1630,7 +1631,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
|
|||
}
|
||||
EXPORT_SYMBOL(call_netdevice_notifiers);
|
||||
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
#ifdef CONFIG_NET_INGRESS
|
||||
static struct static_key ingress_needed __read_mostly;
|
||||
|
||||
void net_inc_ingress_queue(void)
|
||||
|
@ -3666,6 +3667,13 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
|
|||
|
||||
return skb;
|
||||
}
|
||||
#else
|
||||
static inline struct sk_buff *handle_ing(struct sk_buff *skb,
|
||||
struct packet_type **pt_prev,
|
||||
int *ret, struct net_device *orig_dev)
|
||||
{
|
||||
return skb;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
@ -3739,6 +3747,28 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NETFILTER_INGRESS
|
||||
static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
|
||||
int *ret, struct net_device *orig_dev)
|
||||
{
|
||||
if (nf_hook_ingress_active(skb)) {
|
||||
if (*pt_prev) {
|
||||
*ret = deliver_skb(skb, *pt_prev, orig_dev);
|
||||
*pt_prev = NULL;
|
||||
}
|
||||
|
||||
return nf_hook_ingress(skb);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
|
||||
int *ret, struct net_device *orig_dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
|
||||
{
|
||||
struct packet_type *ptype, *pt_prev;
|
||||
|
@ -3798,13 +3828,17 @@ another_round:
|
|||
}
|
||||
|
||||
skip_taps:
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
#ifdef CONFIG_NET_INGRESS
|
||||
if (static_key_false(&ingress_needed)) {
|
||||
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
|
||||
if (!skb)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
|
||||
goto unlock;
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_NET_CLS_ACT
|
||||
skb->tc_verd = 0;
|
||||
ncls:
|
||||
#endif
|
||||
|
@ -6967,6 +7001,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
|
|||
dev->group = INIT_NETDEV_GROUP;
|
||||
if (!dev->ethtool_ops)
|
||||
dev->ethtool_ops = &default_ethtool_ops;
|
||||
|
||||
nf_hook_ingress_init(dev);
|
||||
|
||||
return dev;
|
||||
|
||||
free_all:
|
||||
|
|
|
@ -1,6 +1,13 @@
|
|||
menu "Core Netfilter Configuration"
|
||||
depends on NET && INET && NETFILTER
|
||||
|
||||
config NETFILTER_INGRESS
|
||||
bool "Netfilter ingress support"
|
||||
select NET_INGRESS
|
||||
help
|
||||
This allows you to classify packets from ingress using the Netfilter
|
||||
infrastructure.
|
||||
|
||||
config NETFILTER_NETLINK
|
||||
tristate
|
||||
|
||||
|
|
|
@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex);
|
|||
|
||||
int nf_register_hook(struct nf_hook_ops *reg)
|
||||
{
|
||||
struct list_head *nf_hook_list;
|
||||
struct nf_hook_ops *elem;
|
||||
|
||||
mutex_lock(&nf_hook_mutex);
|
||||
list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
|
||||
switch (reg->pf) {
|
||||
case NFPROTO_NETDEV:
|
||||
#ifdef CONFIG_NETFILTER_INGRESS
|
||||
if (reg->hooknum == NF_NETDEV_INGRESS) {
|
||||
BUG_ON(reg->dev == NULL);
|
||||
nf_hook_list = ®->dev->nf_hooks_ingress;
|
||||
net_inc_ingress_queue();
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
/* Fall through. */
|
||||
default:
|
||||
nf_hook_list = &nf_hooks[reg->pf][reg->hooknum];
|
||||
break;
|
||||
}
|
||||
|
||||
list_for_each_entry(elem, nf_hook_list, list) {
|
||||
if (reg->priority < elem->priority)
|
||||
break;
|
||||
}
|
||||
|
@ -85,6 +102,18 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
|
|||
mutex_lock(&nf_hook_mutex);
|
||||
list_del_rcu(®->list);
|
||||
mutex_unlock(&nf_hook_mutex);
|
||||
switch (reg->pf) {
|
||||
case NFPROTO_NETDEV:
|
||||
#ifdef CONFIG_NETFILTER_INGRESS
|
||||
if (reg->hooknum == NF_NETDEV_INGRESS) {
|
||||
net_dec_ingress_queue();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_JUMP_LABEL
|
||||
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
|
||||
#endif
|
||||
|
@ -166,11 +195,9 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
|
|||
/* We may already have this, but read-locks nest anyway */
|
||||
rcu_read_lock();
|
||||
|
||||
elem = list_entry_rcu(&nf_hooks[state->pf][state->hook],
|
||||
struct nf_hook_ops, list);
|
||||
elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list);
|
||||
next_hook:
|
||||
verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state,
|
||||
&elem);
|
||||
verdict = nf_iterate(state->hook_list, skb, state, &elem);
|
||||
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
|
||||
ret = 1;
|
||||
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
|
||||
|
|
|
@ -312,6 +312,7 @@ config NET_SCH_PIE
|
|||
config NET_SCH_INGRESS
|
||||
tristate "Ingress Qdisc"
|
||||
depends on NET_CLS_ACT
|
||||
select NET_INGRESS
|
||||
---help---
|
||||
Say Y here if you want to use classifiers for incoming packets.
|
||||
If unsure, say Y.
|
||||
|
|
Loading…
Reference in New Issue