OpenCloudOS-Kernel/include/linux/netlink.h

185 lines
5.5 KiB
C
Raw Normal View History

#ifndef __LINUX_NETLINK_H
#define __LINUX_NETLINK_H
#include <linux/capability.h>
#include <linux/skbuff.h>
#include <linux/export.h>
#include <net/scm.h>
#include <uapi/linux/netlink.h>
struct net;
static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
{
return (struct nlmsghdr *)skb->data;
}
enum netlink_skb_flags {
netlink: Only check file credentials for implicit destinations It was possible to get a setuid root or setcap executable to write to it's stdout or stderr (which has been set made a netlink socket) and inadvertently reconfigure the networking stack. To prevent this we check that both the creator of the socket and the currentl applications has permission to reconfigure the network stack. Unfortunately this breaks Zebra which always uses sendto/sendmsg and creates it's socket without any privileges. To keep Zebra working don't bother checking if the creator of the socket has privilege when a destination address is specified. Instead rely exclusively on the privileges of the sender of the socket. Note from Andy: This is exactly Eric's code except for some comment clarifications and formatting fixes. Neither I nor, I think, anyone else is thrilled with this approach, but I'm hesitant to wait on a better fix since 3.15 is almost here. Note to stable maintainers: This is a mess. An earlier series of patches in 3.15 fix a rather serious security issue (CVE-2014-0181), but they did so in a way that breaks Zebra. The offending series includes: commit aa4cf9452f469f16cea8c96283b641b4576d4a7b Author: Eric W. Biederman <ebiederm@xmission.com> Date: Wed Apr 23 14:28:03 2014 -0700 net: Add variants of capable for use on netlink messages If a given kernel version is missing that series of fixes, it's probably worth backporting it and this patch. if that series is present, then this fix is critical if you care about Zebra. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Andy Lutomirski <luto@amacapital.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-31 02:04:00 +08:00
NETLINK_SKB_MMAPED = 0x1, /* Packet data is mmaped */
NETLINK_SKB_TX = 0x2, /* Packet was sent by userspace */
NETLINK_SKB_DELIVERED = 0x4, /* Packet was delivered */
NETLINK_SKB_DST = 0x8, /* Dst set in sendto or sendmsg */
};
struct netlink_skb_parms {
struct scm_creds creds; /* Skb credentials */
__u32 portid;
__u32 dst_group;
__u32 flags;
struct sock *sk;
bool nsid_is_set;
int nsid;
};
#define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb))
#define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds)
extern void netlink_table_grab(void);
extern void netlink_table_ungrab(void);
#define NL_CFG_F_NONROOT_RECV (1 << 0)
#define NL_CFG_F_NONROOT_SEND (1 << 1)
/* optional Netlink kernel configuration parameters */
struct netlink_kernel_cfg {
unsigned int groups;
unsigned int flags;
void (*input)(struct sk_buff *skb);
struct mutex *cb_mutex;
int (*bind)(struct net *net, int group);
void (*unbind)(struct net *net, int group);
bool (*compare)(struct net *net, struct sock *sk);
};
extern struct sock *__netlink_kernel_create(struct net *net, int unit,
struct module *module,
struct netlink_kernel_cfg *cfg);
static inline struct sock *
netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
{
return __netlink_kernel_create(net, unit, THIS_MODULE, cfg);
}
extern void netlink_kernel_release(struct sock *sk);
extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
extern int netlink_has_listeners(struct sock *sk, unsigned int group);
netlink, mmap: fix edge-case leakages in nf queue zero-copy When netlink mmap on receive side is the consumer of nf queue data, it can happen that in some edge cases, we write skb shared info into the user space mmap buffer: Assume a possible rx ring frame size of only 4096, and the network skb, which is being zero-copied into the netlink skb, contains page frags with an overall skb->len larger than the linear part of the netlink skb. skb_zerocopy(), which is generic and thus not aware of the fact that shared info cannot be accessed for such skbs then tries to write and fill frags, thus leaking kernel data/pointers and in some corner cases possibly writing out of bounds of the mmap area (when filling the last slot in the ring buffer this way). I.e. the ring buffer slot is then of status NL_MMAP_STATUS_VALID, has an advertised length larger than 4096, where the linear part is visible at the slot beginning, and the leaked sizeof(struct skb_shared_info) has been written to the beginning of the next slot (also corrupting the struct nl_mmap_hdr slot header incl. status etc), since skb->end points to skb->data + ring->frame_size - NL_MMAP_HDRLEN. The fix adds and lets __netlink_alloc_skb() take the actual needed linear room for the network skb + meta data into account. It's completely irrelevant for non-mmaped netlink sockets, but in case mmap sockets are used, it can be decided whether the available skb_tailroom() is really large enough for the buffer, or whether it needs to internally fallback to a normal alloc_skb(). >From nf queue side, the information whether the destination port is an mmap RX ring is not really available without extra port-to-socket lookup, thus it can only be determined in lower layers i.e. when __netlink_alloc_skb() is called that checks internally for this. I chose to add the extra ldiff parameter as mmap will then still work: We have data_len and hlen in nfqnl_build_packet_message(), data_len is the full length (capped at queue->copy_range) for skb_zerocopy() and hlen some possible part of data_len that needs to be copied; the rem_len variable indicates the needed remaining linear mmap space. The only other workaround in nf queue internally would be after allocation time by f.e. cap'ing the data_len to the skb_tailroom() iff we deal with an mmap skb, but that would 1) expose the fact that we use a mmap skb to upper layers, and 2) trim the skb where we otherwise could just have moved the full skb into the normal receive queue. After the patch, in my test case the ring slot doesn't fit and therefore shows NL_MMAP_STATUS_COPY, where a full skb carries all the data and thus needs to be picked up via recv(). Fixes: 3ab1f683bf8b ("nfnetlink: add support for memory mapped netlink") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-10 08:10:57 +08:00
extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
__u32 group, gfp_t allocation);
extern int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
__u32 portid, __u32 group, gfp_t allocation,
int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
void *filter_data);
extern int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code);
extern int netlink_register_notifier(struct notifier_block *nb);
extern int netlink_unregister_notifier(struct notifier_block *nb);
/* finegrained unicast helpers: */
struct sock *netlink_getsockbyfilp(struct file *filp);
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
long *timeo, struct sock *ssk);
void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
int netlink_sendskb(struct sock *sk, struct sk_buff *skb);
netlink: fix splat in skb_clone with large messages Since (c05cdb1 netlink: allow large data transfers from user-space), netlink splats if it invokes skb_clone on large netlink skbs since: * skb_shared_info was not correctly initialized. * skb->destructor is not set in the cloned skb. This was spotted by trinity: [ 894.990671] BUG: unable to handle kernel paging request at ffffc9000047b001 [ 894.991034] IP: [<ffffffff81a212c4>] skb_clone+0x24/0xc0 [...] [ 894.991034] Call Trace: [ 894.991034] [<ffffffff81ad299a>] nl_fib_input+0x6a/0x240 [ 894.991034] [<ffffffff81c3b7e6>] ? _raw_read_unlock+0x26/0x40 [ 894.991034] [<ffffffff81a5f189>] netlink_unicast+0x169/0x1e0 [ 894.991034] [<ffffffff81a601e1>] netlink_sendmsg+0x251/0x3d0 Fix it by: 1) introducing a new netlink_skb_clone function that is used in nl_fib_input, that sets our special skb->destructor in the cloned skb. Moreover, handle the release of the large cloned skb head area in the destructor path. 2) not allowing large skbuffs in the netlink broadcast path. I cannot find any reasonable use of the large data transfer using netlink in that path, moreover this helps to skip extra skb_clone handling. I found two more netlink clients that are cloning the skbs, but they are not in the sendmsg path. Therefore, the sole client cloning that I found seems to be the fib frontend. Thanks to Eric Dumazet for helping to address this issue. Reported-by: Fengguang Wu <fengguang.wu@intel.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: David S. Miller <davem@davemloft.net>
2013-06-28 09:04:23 +08:00
static inline struct sk_buff *
netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
{
struct sk_buff *nskb;
nskb = skb_clone(skb, gfp_mask);
if (!nskb)
return NULL;
/* This is a large skb, set destructor callback to release head */
if (is_vmalloc_addr(skb->head))
nskb->destructor = skb->destructor;
return nskb;
}
/*
* skb should fit one page. This choice is good for headerless malloc.
* But we should limit to 8K so that userspace does not have to
* use enormous buffer sizes on recvmsg() calls just to avoid
* MSG_TRUNC when PAGE_SIZE is very large.
*/
#if PAGE_SIZE < 8192UL
#define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(PAGE_SIZE)
#else
#define NLMSG_GOODSIZE SKB_WITH_OVERHEAD(8192UL)
#endif
#define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN)
struct netlink_callback {
struct sk_buff *skb;
const struct nlmsghdr *nlh;
int (*start)(struct netlink_callback *);
int (*dump)(struct sk_buff * skb,
struct netlink_callback *cb);
int (*done)(struct netlink_callback *cb);
void *data;
/* the module that dump function belong to */
struct module *module;
u16 family;
u16 min_dump_alloc;
unsigned int prev_seq, seq;
long args[6];
};
struct netlink_notify {
struct net *net;
u32 portid;
int protocol;
};
struct nlmsghdr *
__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags);
struct netlink_dump_control {
int (*start)(struct netlink_callback *);
int (*dump)(struct sk_buff *skb, struct netlink_callback *);
int (*done)(struct netlink_callback *);
void *data;
struct module *module;
u16 min_dump_alloc;
};
extern int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
struct netlink_dump_control *control);
static inline int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
struct netlink_dump_control *control)
{
if (!control->module)
control->module = THIS_MODULE;
return __netlink_dump_start(ssk, skb, nlh, control);
}
struct netlink_tap {
struct net_device *dev;
struct module *module;
struct list_head list;
};
extern int netlink_add_tap(struct netlink_tap *nt);
extern int netlink_remove_tap(struct netlink_tap *nt);
bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
struct user_namespace *ns, int cap);
bool netlink_ns_capable(const struct sk_buff *skb,
struct user_namespace *ns, int cap);
bool netlink_capable(const struct sk_buff *skb, int cap);
bool netlink_net_capable(const struct sk_buff *skb, int cap);
#endif /* __LINUX_NETLINK_H */