net: provide a sysctl raw_l3mdev_accept for raw socket lookup with VRFs
Add a sysctl raw_l3mdev_accept to control raw socket lookup in a manner similar to use of tcp_l3mdev_accept for stream and of udp_l3mdev_accept for datagram sockets. Have this default to enabled for reasons of backwards compatibility. This is so as to specify the output device with cmsg and IP_PKTINFO, but using a socket not bound to the corresponding VRF. This allows e.g. older ping implementations to be run with specifying the device but without executing it in the VRF. If the option is disabled, packets received in a VRF context are only handled by a raw socket bound to the VRF, and correspondingly packets in the default VRF are only handled by a socket not bound to any VRF. Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com> Reviewed-by: David Ahern <dsahern@gmail.com> Tested-by: David Ahern <dsahern@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
6da5b0f027
commit
6897445fb1
|
@ -370,6 +370,7 @@ tcp_l3mdev_accept - BOOLEAN
|
|||
derived from the listen socket to be bound to the L3 domain in
|
||||
which the packets originated. Only valid when the kernel was
|
||||
compiled with CONFIG_NET_L3_MASTER_DEV.
|
||||
Default: 0 (disabled)
|
||||
|
||||
tcp_low_latency - BOOLEAN
|
||||
This is a legacy option, it has no effect anymore.
|
||||
|
@ -773,6 +774,7 @@ udp_l3mdev_accept - BOOLEAN
|
|||
being received regardless of the L3 domain in which they
|
||||
originated. Only valid when the kernel was compiled with
|
||||
CONFIG_NET_L3_MASTER_DEV.
|
||||
Default: 0 (disabled)
|
||||
|
||||
udp_mem - vector of 3 INTEGERs: min, pressure, max
|
||||
Number of pages allowed for queueing by all UDP sockets.
|
||||
|
@ -799,6 +801,16 @@ udp_wmem_min - INTEGER
|
|||
total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
|
||||
Default: 4K
|
||||
|
||||
RAW variables:
|
||||
|
||||
raw_l3mdev_accept - BOOLEAN
|
||||
Enabling this option allows a "global" bound socket to work
|
||||
across L3 master domains (e.g., VRFs) with packets capable of
|
||||
being received regardless of the L3 domain in which they
|
||||
originated. Only valid when the kernel was compiled with
|
||||
CONFIG_NET_L3_MASTER_DEV.
|
||||
Default: 1 (enabled)
|
||||
|
||||
CIPSOv4 Variables:
|
||||
|
||||
cipso_cache_enable - BOOLEAN
|
||||
|
|
|
@ -111,9 +111,22 @@ the same port if they bind to an l3mdev.
|
|||
TCP & UDP services running in the default VRF context (ie., not bound
|
||||
to any VRF device) can work across all VRF domains by enabling the
|
||||
tcp_l3mdev_accept and udp_l3mdev_accept sysctl options:
|
||||
|
||||
sysctl -w net.ipv4.tcp_l3mdev_accept=1
|
||||
sysctl -w net.ipv4.udp_l3mdev_accept=1
|
||||
|
||||
These options are disabled by default so that a socket in a VRF is only
|
||||
selected for packets in that VRF. There is a similar option for RAW
|
||||
sockets, which is enabled by default for reasons of backwards compatibility.
|
||||
This is so as to specify the output device with cmsg and IP_PKTINFO, but
|
||||
using a socket not bound to the corresponding VRF. This allows e.g. older ping
|
||||
implementations to be run with specifying the device but without executing it
|
||||
in the VRF. This option can be disabled so that packets received in a VRF
|
||||
context are only handled by a raw socket bound to the VRF, and packets in the
|
||||
default VRF are only handled by a socket not bound to any VRF:
|
||||
|
||||
sysctl -w net.ipv4.raw_l3mdev_accept=0
|
||||
|
||||
netfilter rules on the VRF device can be used to limit access to services
|
||||
running in the default VRF context as well.
|
||||
|
||||
|
|
|
@ -103,6 +103,9 @@ struct netns_ipv4 {
|
|||
/* Shall we try to damage output packets if routing dev changes? */
|
||||
int sysctl_ip_dynaddr;
|
||||
int sysctl_ip_early_demux;
|
||||
#ifdef CONFIG_NET_L3_MASTER_DEV
|
||||
int sysctl_raw_l3mdev_accept;
|
||||
#endif
|
||||
int sysctl_tcp_early_demux;
|
||||
int sysctl_udp_early_demux;
|
||||
|
||||
|
|
|
@ -61,6 +61,7 @@ void raw_seq_stop(struct seq_file *seq, void *v);
|
|||
|
||||
int raw_hash_sk(struct sock *sk);
|
||||
void raw_unhash_sk(struct sock *sk);
|
||||
void raw_init(void);
|
||||
|
||||
struct raw_sock {
|
||||
/* inet_sock has to be the first member */
|
||||
|
|
|
@ -1964,6 +1964,8 @@ static int __init inet_init(void)
|
|||
/* Add UDP-Lite (RFC 3828) */
|
||||
udplite4_register();
|
||||
|
||||
raw_init();
|
||||
|
||||
ping_init();
|
||||
|
||||
/*
|
||||
|
|
|
@ -805,7 +805,7 @@ out:
|
|||
return copied;
|
||||
}
|
||||
|
||||
static int raw_init(struct sock *sk)
|
||||
static int raw_sk_init(struct sock *sk)
|
||||
{
|
||||
struct raw_sock *rp = raw_sk(sk);
|
||||
|
||||
|
@ -970,7 +970,7 @@ struct proto raw_prot = {
|
|||
.connect = ip4_datagram_connect,
|
||||
.disconnect = __udp_disconnect,
|
||||
.ioctl = raw_ioctl,
|
||||
.init = raw_init,
|
||||
.init = raw_sk_init,
|
||||
.setsockopt = raw_setsockopt,
|
||||
.getsockopt = raw_getsockopt,
|
||||
.sendmsg = raw_sendmsg,
|
||||
|
@ -1133,4 +1133,28 @@ void __init raw_proc_exit(void)
|
|||
{
|
||||
unregister_pernet_subsys(&raw_net_ops);
|
||||
}
|
||||
|
||||
static void raw_sysctl_init_net(struct net *net)
|
||||
{
|
||||
#ifdef CONFIG_NET_L3_MASTER_DEV
|
||||
net->ipv4.sysctl_raw_l3mdev_accept = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int __net_init raw_sysctl_init(struct net *net)
|
||||
{
|
||||
raw_sysctl_init_net(net);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct pernet_operations __net_initdata raw_sysctl_ops = {
|
||||
.init = raw_sysctl_init,
|
||||
};
|
||||
|
||||
void __init raw_init(void)
|
||||
{
|
||||
raw_sysctl_init_net(&init_net);
|
||||
if (register_pernet_subsys(&raw_sysctl_ops))
|
||||
panic("RAW: failed to init sysctl parameters.\n");
|
||||
}
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
|
|
|
@ -602,6 +602,17 @@ static struct ctl_table ipv4_net_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = ipv4_ping_group_range,
|
||||
},
|
||||
#ifdef CONFIG_NET_L3_MASTER_DEV
|
||||
{
|
||||
.procname = "raw_l3mdev_accept",
|
||||
.data = &init_net.ipv4.sysctl_raw_l3mdev_accept,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.procname = "tcp_ecn",
|
||||
.data = &init_net.ipv4.sysctl_tcp_ecn,
|
||||
|
|
Loading…
Reference in New Issue