2007-12-17 05:29:36 +08:00
|
|
|
/*
|
|
|
|
* ipv4 in net namespaces
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __NETNS_IPV4_H__
|
|
|
|
#define __NETNS_IPV4_H__
|
2008-01-10 19:27:51 +08:00
|
|
|
|
2012-05-25 00:34:21 +08:00
|
|
|
#include <linux/uidgid.h>
|
2008-01-22 22:02:14 +08:00
|
|
|
#include <net/inet_frag.h>
|
|
|
|
|
2012-07-10 15:49:14 +08:00
|
|
|
struct tcpm_hash_bucket;
|
2007-12-17 05:31:47 +08:00
|
|
|
struct ctl_table_header;
|
|
|
|
struct ipv4_devconf;
|
2008-01-10 19:27:51 +08:00
|
|
|
struct fib_rules_ops;
|
2008-01-10 19:28:24 +08:00
|
|
|
struct hlist_head;
|
2012-07-06 13:13:13 +08:00
|
|
|
struct fib_table;
|
2008-01-10 19:28:55 +08:00
|
|
|
struct sock;
|
2013-09-29 05:10:59 +08:00
|
|
|
struct local_ports {
|
|
|
|
seqlock_t lock;
|
|
|
|
int range[2];
|
|
|
|
};
|
2007-12-17 05:31:47 +08:00
|
|
|
|
2014-05-07 02:02:50 +08:00
|
|
|
struct ping_group_range {
|
|
|
|
seqlock_t lock;
|
|
|
|
kgid_t range[2];
|
|
|
|
};
|
|
|
|
|
2007-12-17 05:29:36 +08:00
|
|
|
struct netns_ipv4 {
|
2008-01-06 15:08:49 +08:00
|
|
|
#ifdef CONFIG_SYSCTL
|
2007-12-17 05:31:47 +08:00
|
|
|
struct ctl_table_header *forw_hdr;
|
2008-01-22 22:08:36 +08:00
|
|
|
struct ctl_table_header *frags_hdr;
|
2008-03-26 16:56:24 +08:00
|
|
|
struct ctl_table_header *ipv4_hdr;
|
2008-07-06 10:02:33 +08:00
|
|
|
struct ctl_table_header *route_hdr;
|
2013-02-06 17:46:33 +08:00
|
|
|
struct ctl_table_header *xfrm4_hdr;
|
2008-01-06 15:08:49 +08:00
|
|
|
#endif
|
2007-12-17 05:31:47 +08:00
|
|
|
struct ipv4_devconf *devconf_all;
|
|
|
|
struct ipv4_devconf *devconf_dflt;
|
2008-01-10 19:27:51 +08:00
|
|
|
#ifdef CONFIG_IP_MULTIPLE_TABLES
|
|
|
|
struct fib_rules_ops *rules_ops;
|
2012-07-06 13:13:13 +08:00
|
|
|
bool fib_has_custom_rules;
|
|
|
|
struct fib_table *fib_local;
|
|
|
|
struct fib_table *fib_main;
|
|
|
|
struct fib_table *fib_default;
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_IP_ROUTE_CLASSID
|
|
|
|
int fib_num_tclassid_users;
|
2008-01-10 19:27:51 +08:00
|
|
|
#endif
|
2008-01-10 19:28:24 +08:00
|
|
|
struct hlist_head *fib_table_hash;
|
2008-01-10 19:28:55 +08:00
|
|
|
struct sock *fibnl;
|
2008-01-22 22:02:14 +08:00
|
|
|
|
2015-01-30 07:58:09 +08:00
|
|
|
struct sock * __percpu *icmp_sk;
|
2015-02-26 01:58:35 +08:00
|
|
|
struct sock *mc_autojoin_sk;
|
2015-01-30 07:58:09 +08:00
|
|
|
|
2012-06-08 09:20:41 +08:00
|
|
|
struct inet_peer_base *peers;
|
2012-07-10 15:49:14 +08:00
|
|
|
struct tcpm_hash_bucket *tcp_metrics_hash;
|
2012-07-20 07:02:34 +08:00
|
|
|
unsigned int tcp_metrics_hash_log;
|
2015-01-30 13:35:05 +08:00
|
|
|
struct sock * __percpu *tcp_sk;
|
2008-01-22 22:02:14 +08:00
|
|
|
struct netns_frags frags;
|
2008-01-31 20:03:23 +08:00
|
|
|
#ifdef CONFIG_NETFILTER
|
|
|
|
struct xt_table *iptable_filter;
|
|
|
|
struct xt_table *iptable_mangle;
|
|
|
|
struct xt_table *iptable_raw;
|
2008-01-31 20:05:09 +08:00
|
|
|
struct xt_table *arptable_filter;
|
2010-01-18 15:08:37 +08:00
|
|
|
#ifdef CONFIG_SECURITY
|
2008-06-10 06:57:24 +08:00
|
|
|
struct xt_table *iptable_security;
|
2010-01-18 15:08:37 +08:00
|
|
|
#endif
|
2008-10-08 17:35:10 +08:00
|
|
|
struct xt_table *nat_table;
|
2008-01-31 20:03:23 +08:00
|
|
|
#endif
|
2008-03-26 16:55:37 +08:00
|
|
|
|
|
|
|
int sysctl_icmp_echo_ignore_all;
|
|
|
|
int sysctl_icmp_echo_ignore_broadcasts;
|
|
|
|
int sysctl_icmp_ignore_bogus_error_responses;
|
|
|
|
int sysctl_icmp_ratelimit;
|
|
|
|
int sysctl_icmp_ratemask;
|
|
|
|
int sysctl_icmp_errors_use_inbound_ifaddr;
|
2008-07-06 10:02:59 +08:00
|
|
|
|
2014-05-07 02:02:49 +08:00
|
|
|
struct local_ports ip_local_ports;
|
2013-09-29 05:10:59 +08:00
|
|
|
|
2013-01-06 00:10:48 +08:00
|
|
|
int sysctl_tcp_ecn;
|
2013-12-14 12:13:38 +08:00
|
|
|
int sysctl_ip_no_pmtu_disc;
|
2014-01-09 17:01:15 +08:00
|
|
|
int sysctl_ip_fwd_use_pmtu;
|
2014-09-05 21:09:03 +08:00
|
|
|
int sysctl_ip_nonlocal_bind;
|
2013-01-06 00:10:48 +08:00
|
|
|
|
2014-05-14 01:17:33 +08:00
|
|
|
int sysctl_fwmark_reflect;
|
net: support marking accepting TCP sockets
When using mark-based routing, sockets returned from accept()
may need to be marked differently depending on the incoming
connection request.
This is the case, for example, if different socket marks identify
different networks: a listening socket may want to accept
connections from all networks, but each connection should be
marked with the network that the request came in on, so that
subsequent packets are sent on the correct network.
This patch adds a sysctl to mark TCP sockets based on the fwmark
of the incoming SYN packet. If enabled, and an unmarked socket
receives a SYN, then the SYN packet's fwmark is written to the
connection's inet_request_sock, and later written back to the
accepted socket when the connection is established. If the
socket already has a nonzero mark, then the behaviour is the same
as it is today, i.e., the listening socket's fwmark is used.
Black-box tested using user-mode linux:
- IPv4/IPv6 SYN+ACK, FIN, etc. packets are routed based on the
mark of the incoming SYN packet.
- The socket returned by accept() is marked with the mark of the
incoming SYN packet.
- Tested with syncookies=1 and syncookies=2.
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-14 01:17:35 +08:00
|
|
|
int sysctl_tcp_fwmark_accept;
|
2015-02-10 09:53:16 +08:00
|
|
|
int sysctl_tcp_mtu_probing;
|
|
|
|
int sysctl_tcp_base_mss;
|
2014-05-14 01:17:33 +08:00
|
|
|
|
2014-05-07 02:02:50 +08:00
|
|
|
struct ping_group_range ping_group_range;
|
net: ipv4: add IPPROTO_ICMP socket kind
This patch adds IPPROTO_ICMP socket kind. It makes it possible to send
ICMP_ECHO messages and receive the corresponding ICMP_ECHOREPLY messages
without any special privileges. In other words, the patch makes it
possible to implement setuid-less and CAP_NET_RAW-less /bin/ping. In
order not to increase the kernel's attack surface, the new functionality
is disabled by default, but is enabled at bootup by supporting Linux
distributions, optionally with restriction to a group or a group range
(see below).
Similar functionality is implemented in Mac OS X:
http://www.manpagez.com/man/4/icmp/
A new ping socket is created with
socket(PF_INET, SOCK_DGRAM, PROT_ICMP)
Message identifiers (octets 4-5 of ICMP header) are interpreted as local
ports. Addresses are stored in struct sockaddr_in. No port numbers are
reserved for privileged processes, port 0 is reserved for API ("let the
kernel pick a free number"). There is no notion of remote ports, remote
port numbers provided by the user (e.g. in connect()) are ignored.
Data sent and received include ICMP headers. This is deliberate to:
1) Avoid the need to transport headers values like sequence numbers by
other means.
2) Make it easier to port existing programs using raw sockets.
ICMP headers given to send() are checked and sanitized. The type must be
ICMP_ECHO and the code must be zero (future extensions might relax this,
see below). The id is set to the number (local port) of the socket, the
checksum is always recomputed.
ICMP reply packets received from the network are demultiplexed according
to their id's, and are returned by recv() without any modifications.
IP header information and ICMP errors of those packets may be obtained
via ancillary data (IP_RECVTTL, IP_RETOPTS, and IP_RECVERR). ICMP source
quenches and redirects are reported as fake errors via the error queue
(IP_RECVERR); the next hop address for redirects is saved to ee_info (in
network order).
socket(2) is restricted to the group range specified in
"/proc/sys/net/ipv4/ping_group_range". It is "1 0" by default, meaning
that nobody (not even root) may create ping sockets. Setting it to "100
100" would grant permissions to the single group (to either make
/sbin/ping g+s and owned by this group or to grant permissions to the
"netadmins" group), "0 4294967295" would enable it for the world, "100
4294967295" would enable it for the users, but not daemons.
The existing code might be (in the unlikely case anyone needs it)
extended rather easily to handle other similar pairs of ICMP messages
(Timestamp/Reply, Information Request/Reply, Address Mask Request/Reply
etc.).
Userspace ping util & patch for it:
http://openwall.info/wiki/people/segoon/ping
For Openwall GNU/*/Linux it was the last step on the road to the
setuid-less distro. A revision of this patch (for RHEL5/OpenVZ kernels)
is in use in Owl-current, such as in the 2011/03/12 LiveCD ISOs:
http://mirrors.kernel.org/openwall/Owl/current/iso/
Initially this functionality was written by Pavel Kankovsky for
Linux 2.4.32, but unfortunately it was never made public.
All ping options (-b, -p, -Q, -R, -s, -t, -T, -M, -I), are tested with
the patch.
PATCH v3:
- switched to flowi4.
- minor changes to be consistent with raw sockets code.
PATCH v2:
- changed ping_debug() to pr_debug().
- removed CONFIG_IP_PING.
- removed ping_seq_fops.owner field (unused for procfs).
- switched to proc_net_fops_create().
- switched to %pK in seq_printf().
PATCH v1:
- fixed checksumming bug.
- CAP_NET_RAW may not create icmp sockets anymore.
RFC v2:
- minor cleanups.
- introduced sysctl'able group range to restrict socket(2).
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-05-13 18:01:00 +08:00
|
|
|
|
2011-03-25 08:42:21 +08:00
|
|
|
atomic_t dev_addr_genid;
|
2009-01-22 12:56:15 +08:00
|
|
|
|
2014-05-13 07:04:53 +08:00
|
|
|
#ifdef CONFIG_SYSCTL
|
|
|
|
unsigned long *sysctl_local_reserved_ports;
|
|
|
|
#endif
|
|
|
|
|
2009-01-22 12:56:15 +08:00
|
|
|
#ifdef CONFIG_IP_MROUTE
|
ipv4: ipmr: support multiple tables
This patch adds support for multiple independant multicast routing instances,
named "tables".
Userspace multicast routing daemons can bind to a specific table instance by
issuing a setsockopt call using a new option MRT_TABLE. The table number is
stored in the raw socket data and affects all following ipmr setsockopt(),
getsockopt() and ioctl() calls. By default, a single table (RT_TABLE_DEFAULT)
is created with a default routing rule pointing to it. Newly created pimreg
devices have the table number appended ("pimregX"), with the exception of
devices created in the default table, which are named just "pimreg" for
compatibility reasons.
Packets are directed to a specific table instance using routing rules,
similar to how regular routing rules work. Currently iif, oif and mark
are supported as keys, source and destination addresses could be supported
additionally.
Example usage:
- bind pimd/xorp/... to a specific table:
uint32_t table = 123;
setsockopt(fd, IPPROTO_IP, MRT_TABLE, &table, sizeof(table));
- create routing rules directing packets to the new table:
# ip mrule add iif eth0 lookup 123
# ip mrule add oif eth0 lookup 123
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-04-13 13:03:23 +08:00
|
|
|
#ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES
|
2010-04-13 13:03:22 +08:00
|
|
|
struct mr_table *mrt;
|
ipv4: ipmr: support multiple tables
This patch adds support for multiple independant multicast routing instances,
named "tables".
Userspace multicast routing daemons can bind to a specific table instance by
issuing a setsockopt call using a new option MRT_TABLE. The table number is
stored in the raw socket data and affects all following ipmr setsockopt(),
getsockopt() and ioctl() calls. By default, a single table (RT_TABLE_DEFAULT)
is created with a default routing rule pointing to it. Newly created pimreg
devices have the table number appended ("pimregX"), with the exception of
devices created in the default table, which are named just "pimreg" for
compatibility reasons.
Packets are directed to a specific table instance using routing rules,
similar to how regular routing rules work. Currently iif, oif and mark
are supported as keys, source and destination addresses could be supported
additionally.
Example usage:
- bind pimd/xorp/... to a specific table:
uint32_t table = 123;
setsockopt(fd, IPPROTO_IP, MRT_TABLE, &table, sizeof(table));
- create routing rules directing packets to the new table:
# ip mrule add iif eth0 lookup 123
# ip mrule add oif eth0 lookup 123
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2010-04-13 13:03:23 +08:00
|
|
|
#else
|
|
|
|
struct list_head mr_tables;
|
|
|
|
struct fib_rules_ops *mr_rules_ops;
|
|
|
|
#endif
|
2009-01-22 12:56:15 +08:00
|
|
|
#endif
|
2013-07-30 08:33:53 +08:00
|
|
|
atomic_t rt_genid;
|
2007-12-17 05:29:36 +08:00
|
|
|
};
|
|
|
|
#endif
|