OpenCloudOS-Kernel/net/llc/af_llc.c

1268 lines
31 KiB
C
Raw Normal View History

/*
* af_llc.c - LLC User Interface SAPs
* Description:
* Functions in this module are implementation of socket based llc
* communications for the Linux operating system. Support of llc class
* one and class two is provided via SOCK_DGRAM and SOCK_STREAM
* respectively.
*
* An llc2 connection is (mac + sap), only one llc2 sap connection
* is allowed per mac. Though one sap may have multiple mac + sap
* connections.
*
* Copyright (c) 2001 by Jay Schulist <jschlst@samba.org>
* 2002-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br>
*
* This program can be redistributed or modified under the terms of the
* GNU General Public License as published by the Free Software Foundation.
* This program is distributed without any warranty or implied warranty
* of merchantability or fitness for a particular purpose.
*
* See the GNU General Public License for more details.
*/
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/sched/signal.h>
#include <net/llc.h>
#include <net/llc_sap.h>
#include <net/llc_pdu.h>
#include <net/llc_conn.h>
#include <net/tcp_states.h>
/* remember: uninitialized global data is zeroed because its in .bss */
static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
static u16 llc_ui_sap_link_no_max[256];
static struct sockaddr_llc llc_ui_addrnull;
static const struct proto_ops llc_ui_ops;
static bool llc_ui_wait_for_conn(struct sock *sk, long timeout);
static int llc_ui_wait_for_disc(struct sock *sk, long timeout);
static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout);
#if 0
#define dprintk(args...) printk(KERN_DEBUG args)
#else
#define dprintk(args...)
#endif
/* Maybe we'll add some more in the future. */
#define LLC_CMSG_PKTINFO 1
/**
* llc_ui_next_link_no - return the next unused link number for a sap
* @sap: Address of sap to get link number from.
*
* Return the next unused link number for a given sap.
*/
static inline u16 llc_ui_next_link_no(int sap)
{
return llc_ui_sap_link_no_max[sap]++;
}
/**
* llc_proto_type - return eth protocol for ARP header type
* @arphrd: ARP header type.
*
* Given an ARP header type return the corresponding ethernet protocol.
*/
static inline __be16 llc_proto_type(u16 arphrd)
{
return htons(ETH_P_802_2);
}
/**
* llc_ui_addr_null - determines if a address structure is null
* @addr: Address to test if null.
*/
static inline u8 llc_ui_addr_null(struct sockaddr_llc *addr)
{
return !memcmp(addr, &llc_ui_addrnull, sizeof(*addr));
}
/**
* llc_ui_header_len - return length of llc header based on operation
* @sk: Socket which contains a valid llc socket type.
* @addr: Complete sockaddr_llc structure received from the user.
*
* Provide the length of the llc header depending on what kind of
* operation the user would like to perform and the type of socket.
* Returns the correct llc header length.
*/
static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr)
{
u8 rc = LLC_PDU_LEN_U;
if (addr->sllc_test || addr->sllc_xid)
rc = LLC_PDU_LEN_U;
else if (sk->sk_type == SOCK_STREAM)
rc = LLC_PDU_LEN_I;
return rc;
}
/**
* llc_ui_send_data - send data via reliable llc2 connection
* @sk: Connection the socket is using.
* @skb: Data the user wishes to send.
* @noblock: can we block waiting for data?
*
* Send data via reliable llc2 connection.
* Returns 0 upon success, non-zero if action did not succeed.
*/
static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock)
{
struct llc_sock* llc = llc_sk(sk);
int rc = 0;
if (unlikely(llc_data_accept_state(llc->state) ||
llc->remote_busy_flag ||
llc->p_flag)) {
long timeout = sock_sndtimeo(sk, noblock);
rc = llc_ui_wait_for_busy_core(sk, timeout);
}
if (unlikely(!rc))
rc = llc_build_and_send_pkt(sk, skb);
return rc;
}
static void llc_ui_sk_init(struct socket *sock, struct sock *sk)
{
sock_graft(sk, sock);
sk->sk_type = sock->type;
sock->ops = &llc_ui_ops;
}
static struct proto llc_proto = {
.name = "LLC",
.owner = THIS_MODULE,
.obj_size = sizeof(struct llc_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,
};
/**
* llc_ui_create - alloc and init a new llc_ui socket
* @net: network namespace (must be default network)
* @sock: Socket to initialize and attach allocated sk to.
* @protocol: Unused.
* @kern: on behalf of kernel or userspace
*
* Allocate and initialize a new llc_ui socket, validate the user wants a
* socket type we have available.
* Returns 0 upon success, negative upon failure.
*/
static int llc_ui_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
struct sock *sk;
int rc = -ESOCKTNOSUPPORT;
if (!ns_capable(net->user_ns, CAP_NET_RAW))
return -EPERM;
if (!net_eq(net, &init_net))
return -EAFNOSUPPORT;
if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) {
rc = -ENOMEM;
sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto, kern);
if (sk) {
rc = 0;
llc_ui_sk_init(sock, sk);
}
}
return rc;
}
/**
* llc_ui_release - shutdown socket
* @sock: Socket to release.
*
* Shutdown and deallocate an existing socket.
*/
static int llc_ui_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct llc_sock *llc;
if (unlikely(sk == NULL))
goto out;
sock_hold(sk);
lock_sock(sk);
llc = llc_sk(sk);
dprintk("%s: closing local(%02X) remote(%02X)\n", __func__,
llc->laddr.lsap, llc->daddr.lsap);
if (!llc_send_disc(sk))
llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo);
if (!sock_flag(sk, SOCK_ZAPPED)) {
struct llc_sap *sap = llc->sap;
/* Hold this for release_sock(), so that llc_backlog_rcv()
* could still use it.
*/
llc_sap_hold(sap);
llc_sap_remove_socket(llc->sap, sk);
release_sock(sk);
llc_sap_put(sap);
} else {
release_sock(sk);
}
if (llc->dev)
dev_put(llc->dev);
sock_put(sk);
llc_sk_free(sk);
out:
return 0;
}
/**
* llc_ui_autoport - provide dynamically allocate SAP number
*
* Provide the caller with a dynamically allocated SAP number according
* to the rules that are set in this function. Returns: 0, upon failure,
* SAP number otherwise.
*/
static int llc_ui_autoport(void)
{
struct llc_sap *sap;
int i, tries = 0;
while (tries < LLC_SAP_DYN_TRIES) {
for (i = llc_ui_sap_last_autoport;
i < LLC_SAP_DYN_STOP; i += 2) {
sap = llc_sap_find(i);
if (!sap) {
llc_ui_sap_last_autoport = i + 2;
goto out;
}
llc_sap_put(sap);
}
llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
tries++;
}
i = 0;
out:
return i;
}
/**
* llc_ui_autobind - automatically bind a socket to a sap
* @sock: socket to bind
* @addr: address to connect to
*
* Used by llc_ui_connect and llc_ui_sendmsg when the user hasn't
* specifically used llc_ui_bind to bind to an specific address/sap
*
* Returns: 0 upon success, negative otherwise.
*/
static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
{
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
struct llc_sap *sap;
int rc = -EINVAL;
if (!sock_flag(sk, SOCK_ZAPPED))
goto out;
rc = -ENODEV;
if (sk->sk_bound_dev_if) {
llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
if (llc->dev && addr->sllc_arphrd != llc->dev->type) {
dev_put(llc->dev);
llc->dev = NULL;
}
} else
llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
if (!llc->dev)
goto out;
rc = -EUSERS;
llc->laddr.lsap = llc_ui_autoport();
if (!llc->laddr.lsap)
goto out;
rc = -EBUSY; /* some other network layer is using the sap */
sap = llc_sap_open(llc->laddr.lsap, NULL);
if (!sap)
goto out;
memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN);
memcpy(&llc->addr, addr, sizeof(llc->addr));
/* assign new connection to its SAP */
llc_sap_add_socket(sap, sk);
sock_reset_flag(sk, SOCK_ZAPPED);
rc = 0;
out:
return rc;
}
/**
* llc_ui_bind - bind a socket to a specific address.
* @sock: Socket to bind an address to.
* @uaddr: Address the user wants the socket bound to.
* @addrlen: Length of the uaddr structure.
*
* Bind a socket to a specific address. For llc a user is able to bind to
* a specific sap only or mac + sap.
* If the user desires to bind to a specific mac + sap, it is possible to
* have multiple sap connections via multiple macs.
* Bind and autobind for that matter must enforce the correct sap usage
* otherwise all hell will break loose.
* Returns: 0 upon success, negative otherwise.
*/
static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
{
struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr;
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
struct llc_sap *sap;
int rc = -EINVAL;
dprintk("%s: binding %02X\n", __func__, addr->sllc_sap);
lock_sock(sk);
if (unlikely(!sock_flag(sk, SOCK_ZAPPED) || addrlen != sizeof(*addr)))
goto out;
rc = -EAFNOSUPPORT;
if (unlikely(addr->sllc_family != AF_LLC))
goto out;
rc = -ENODEV;
rcu_read_lock();
if (sk->sk_bound_dev_if) {
llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
if (llc->dev) {
if (!addr->sllc_arphrd)
addr->sllc_arphrd = llc->dev->type;
if (is_zero_ether_addr(addr->sllc_mac))
memcpy(addr->sllc_mac, llc->dev->dev_addr,
IFHWADDRLEN);
if (addr->sllc_arphrd != llc->dev->type ||
!ether_addr_equal(addr->sllc_mac,
llc->dev->dev_addr)) {
rc = -EINVAL;
llc->dev = NULL;
}
}
} else
llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
addr->sllc_mac);
if (llc->dev)
dev_hold(llc->dev);
rcu_read_unlock();
if (!llc->dev)
goto out;
if (!addr->sllc_sap) {
rc = -EUSERS;
addr->sllc_sap = llc_ui_autoport();
if (!addr->sllc_sap)
goto out;
}
sap = llc_sap_find(addr->sllc_sap);
if (!sap) {
sap = llc_sap_open(addr->sllc_sap, NULL);
rc = -EBUSY; /* some other network layer is using the sap */
if (!sap)
goto out;
} else {
struct llc_addr laddr, daddr;
struct sock *ask;
memset(&laddr, 0, sizeof(laddr));
memset(&daddr, 0, sizeof(daddr));
/*
* FIXME: check if the address is multicast,
* only SOCK_DGRAM can do this.
*/
memcpy(laddr.mac, addr->sllc_mac, IFHWADDRLEN);
laddr.lsap = addr->sllc_sap;
rc = -EADDRINUSE; /* mac + sap clash. */
ask = llc_lookup_established(sap, &daddr, &laddr);
if (ask) {
sock_put(ask);
goto out_put;
}
}
llc->laddr.lsap = addr->sllc_sap;
memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN);
memcpy(&llc->addr, addr, sizeof(llc->addr));
/* assign new connection to its SAP */
llc_sap_add_socket(sap, sk);
sock_reset_flag(sk, SOCK_ZAPPED);
rc = 0;
out_put:
llc_sap_put(sap);
out:
release_sock(sk);
return rc;
}
/**
* llc_ui_shutdown - shutdown a connect llc2 socket.
* @sock: Socket to shutdown.
* @how: What part of the socket to shutdown.
*
* Shutdown a connected llc2 socket. Currently this function only supports
* shutting down both sends and receives (2), we could probably make this
* function such that a user can shutdown only half the connection but not
* right now.
* Returns: 0 upon success, negative otherwise.
*/
static int llc_ui_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
int rc = -ENOTCONN;
lock_sock(sk);
if (unlikely(sk->sk_state != TCP_ESTABLISHED))
goto out;
rc = -EINVAL;
if (how != 2)
goto out;
rc = llc_send_disc(sk);
if (!rc)
rc = llc_ui_wait_for_disc(sk, sk->sk_rcvtimeo);
/* Wake up anyone sleeping in poll */
sk->sk_state_change(sk);
out:
release_sock(sk);
return rc;
}
/**
* llc_ui_connect - Connect to a remote llc2 mac + sap.
* @sock: Socket which will be connected to the remote destination.
* @uaddr: Remote and possibly the local address of the new connection.
* @addrlen: Size of uaddr structure.
* @flags: Operational flags specified by the user.
*
* Connect to a remote llc2 mac + sap. The caller must specify the
* destination mac and address to connect to. If the user hasn't previously
* called bind(2) with a smac the address of the first interface of the
* specified arp type will be used.
* This function will autobind if user did not previously call bind.
* Returns: 0 upon success, negative otherwise.
*/
static int llc_ui_connect(struct socket *sock, struct sockaddr *uaddr,
int addrlen, int flags)
{
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr;
int rc = -EINVAL;
lock_sock(sk);
if (unlikely(addrlen != sizeof(*addr)))
goto out;
rc = -EAFNOSUPPORT;
if (unlikely(addr->sllc_family != AF_LLC))
goto out;
if (unlikely(sk->sk_type != SOCK_STREAM))
goto out;
rc = -EALREADY;
if (unlikely(sock->state == SS_CONNECTING))
goto out;
/* bind connection to sap if user hasn't done it. */
if (sock_flag(sk, SOCK_ZAPPED)) {
/* bind to sap with null dev, exclusive */
rc = llc_ui_autobind(sock, addr);
if (rc)
goto out;
}
llc->daddr.lsap = addr->sllc_sap;
memcpy(llc->daddr.mac, addr->sllc_mac, IFHWADDRLEN);
sock->state = SS_CONNECTING;
sk->sk_state = TCP_SYN_SENT;
llc->link = llc_ui_next_link_no(llc->sap->laddr.lsap);
rc = llc_establish_connection(sk, llc->dev->dev_addr,
addr->sllc_mac, addr->sllc_sap);
if (rc) {
dprintk("%s: llc_ui_send_conn failed :-(\n", __func__);
sock->state = SS_UNCONNECTED;
sk->sk_state = TCP_CLOSE;
goto out;
}
if (sk->sk_state == TCP_SYN_SENT) {
const long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
if (!timeo || !llc_ui_wait_for_conn(sk, timeo))
goto out;
rc = sock_intr_errno(timeo);
if (signal_pending(current))
goto out;
}
if (sk->sk_state == TCP_CLOSE)
goto sock_error;
sock->state = SS_CONNECTED;
rc = 0;
out:
release_sock(sk);
return rc;
sock_error:
rc = sock_error(sk) ? : -ECONNABORTED;
sock->state = SS_UNCONNECTED;
goto out;
}
/**
* llc_ui_listen - allow a normal socket to accept incoming connections
* @sock: Socket to allow incoming connections on.
* @backlog: Number of connections to queue.
*
* Allow a normal socket to accept incoming connections.
* Returns 0 upon success, negative otherwise.
*/
static int llc_ui_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
int rc = -EINVAL;
lock_sock(sk);
if (unlikely(sock->state != SS_UNCONNECTED))
goto out;
rc = -EOPNOTSUPP;
if (unlikely(sk->sk_type != SOCK_STREAM))
goto out;
rc = -EAGAIN;
if (sock_flag(sk, SOCK_ZAPPED))
goto out;
rc = 0;
if (!(unsigned int)backlog) /* BSDism */
backlog = 1;
sk->sk_max_ack_backlog = backlog;
if (sk->sk_state != TCP_LISTEN) {
sk->sk_ack_backlog = 0;
sk->sk_state = TCP_LISTEN;
}
sk->sk_socket->flags |= __SO_ACCEPTCON;
out:
release_sock(sk);
return rc;
}
static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
int rc = 0;
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait))
break;
rc = -ERESTARTSYS;
if (signal_pending(current))
break;
rc = -EAGAIN;
if (!timeout)
break;
rc = 0;
}
remove_wait_queue(sk_sleep(sk), &wait);
return rc;
}
static bool llc_ui_wait_for_conn(struct sock *sk, long timeout)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait))
break;
if (signal_pending(current) || !timeout)
break;
}
remove_wait_queue(sk_sleep(sk), &wait);
return timeout;
}
static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct llc_sock *llc = llc_sk(sk);
int rc;
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
rc = 0;
if (sk_wait_event(sk, &timeout,
(sk->sk_shutdown & RCV_SHUTDOWN) ||
(!llc_data_accept_state(llc->state) &&
!llc->remote_busy_flag &&
!llc->p_flag), &wait))
break;
rc = -ERESTARTSYS;
if (signal_pending(current))
break;
rc = -EAGAIN;
if (!timeout)
break;
}
remove_wait_queue(sk_sleep(sk), &wait);
return rc;
}
static int llc_wait_data(struct sock *sk, long timeo)
{
int rc;
while (1) {
/*
* POSIX 1003.1g mandates this order.
*/
rc = sock_error(sk);
if (rc)
break;
rc = 0;
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
rc = -EAGAIN;
if (!timeo)
break;
rc = sock_intr_errno(timeo);
if (signal_pending(current))
break;
rc = 0;
if (sk_wait_data(sk, &timeo, NULL))
break;
}
return rc;
}
static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
{
struct llc_sock *llc = llc_sk(skb->sk);
if (llc->cmsg_flags & LLC_CMSG_PKTINFO) {
struct llc_pktinfo info;
memset(&info, 0, sizeof(info));
info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex;
llc_pdu_decode_dsap(skb, &info.lpi_sap);
llc_pdu_decode_da(skb, info.lpi_mac);
put_cmsg(msg, SOL_LLC, LLC_OPT_PKTINFO, sizeof(info), &info);
}
}
/**
* llc_ui_accept - accept a new incoming connection.
* @sock: Socket which connections arrive on.
* @newsock: Socket to move incoming connection to.
* @flags: User specified operational flags.
net: Work around lockdep limitation in sockets that use sockets Lockdep issues a circular dependency warning when AFS issues an operation through AF_RXRPC from a context in which the VFS/VM holds the mmap_sem. The theory lockdep comes up with is as follows: (1) If the pagefault handler decides it needs to read pages from AFS, it calls AFS with mmap_sem held and AFS begins an AF_RXRPC call, but creating a call requires the socket lock: mmap_sem must be taken before sk_lock-AF_RXRPC (2) afs_open_socket() opens an AF_RXRPC socket and binds it. rxrpc_bind() binds the underlying UDP socket whilst holding its socket lock. inet_bind() takes its own socket lock: sk_lock-AF_RXRPC must be taken before sk_lock-AF_INET (3) Reading from a TCP socket into a userspace buffer might cause a fault and thus cause the kernel to take the mmap_sem, but the TCP socket is locked whilst doing this: sk_lock-AF_INET must be taken before mmap_sem However, lockdep's theory is wrong in this instance because it deals only with lock classes and not individual locks. The AF_INET lock in (2) isn't really equivalent to the AF_INET lock in (3) as the former deals with a socket entirely internal to the kernel that never sees userspace. This is a limitation in the design of lockdep. Fix the general case by: (1) Double up all the locking keys used in sockets so that one set are used if the socket is created by userspace and the other set is used if the socket is created by the kernel. (2) Store the kern parameter passed to sk_alloc() in a variable in the sock struct (sk_kern_sock). This informs sock_lock_init(), sock_init_data() and sk_clone_lock() as to the lock keys to be used. Note that the child created by sk_clone_lock() inherits the parent's kern setting. (3) Add a 'kern' parameter to ->accept() that is analogous to the one passed in to ->create() that distinguishes whether kernel_accept() or sys_accept4() was the caller and can be passed to sk_alloc(). Note that a lot of accept functions merely dequeue an already allocated socket. I haven't touched these as the new socket already exists before we get the parameter. Note also that there are a couple of places where I've made the accepted socket unconditionally kernel-based: irda_accept() rds_rcp_accept_one() tcp_accept_from_sock() because they follow a sock_create_kern() and accept off of that. Whilst creating this, I noticed that lustre and ocfs don't create sockets through sock_create_kern() and thus they aren't marked as for-kernel, though they appear to be internal. I wonder if these should do that so that they use the new set of lock keys. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-09 16:09:05 +08:00
* @kern: If the socket is kernel internal
*
* Accept a new incoming connection.
* Returns 0 upon success, negative otherwise.
*/
net: Work around lockdep limitation in sockets that use sockets Lockdep issues a circular dependency warning when AFS issues an operation through AF_RXRPC from a context in which the VFS/VM holds the mmap_sem. The theory lockdep comes up with is as follows: (1) If the pagefault handler decides it needs to read pages from AFS, it calls AFS with mmap_sem held and AFS begins an AF_RXRPC call, but creating a call requires the socket lock: mmap_sem must be taken before sk_lock-AF_RXRPC (2) afs_open_socket() opens an AF_RXRPC socket and binds it. rxrpc_bind() binds the underlying UDP socket whilst holding its socket lock. inet_bind() takes its own socket lock: sk_lock-AF_RXRPC must be taken before sk_lock-AF_INET (3) Reading from a TCP socket into a userspace buffer might cause a fault and thus cause the kernel to take the mmap_sem, but the TCP socket is locked whilst doing this: sk_lock-AF_INET must be taken before mmap_sem However, lockdep's theory is wrong in this instance because it deals only with lock classes and not individual locks. The AF_INET lock in (2) isn't really equivalent to the AF_INET lock in (3) as the former deals with a socket entirely internal to the kernel that never sees userspace. This is a limitation in the design of lockdep. Fix the general case by: (1) Double up all the locking keys used in sockets so that one set are used if the socket is created by userspace and the other set is used if the socket is created by the kernel. (2) Store the kern parameter passed to sk_alloc() in a variable in the sock struct (sk_kern_sock). This informs sock_lock_init(), sock_init_data() and sk_clone_lock() as to the lock keys to be used. Note that the child created by sk_clone_lock() inherits the parent's kern setting. (3) Add a 'kern' parameter to ->accept() that is analogous to the one passed in to ->create() that distinguishes whether kernel_accept() or sys_accept4() was the caller and can be passed to sk_alloc(). Note that a lot of accept functions merely dequeue an already allocated socket. I haven't touched these as the new socket already exists before we get the parameter. Note also that there are a couple of places where I've made the accepted socket unconditionally kernel-based: irda_accept() rds_rcp_accept_one() tcp_accept_from_sock() because they follow a sock_create_kern() and accept off of that. Whilst creating this, I noticed that lustre and ocfs don't create sockets through sock_create_kern() and thus they aren't marked as for-kernel, though they appear to be internal. I wonder if these should do that so that they use the new set of lock keys. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2017-03-09 16:09:05 +08:00
static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern)
{
struct sock *sk = sock->sk, *newsk;
struct llc_sock *llc, *newllc;
struct sk_buff *skb;
int rc = -EOPNOTSUPP;
dprintk("%s: accepting on %02X\n", __func__,
llc_sk(sk)->laddr.lsap);
lock_sock(sk);
if (unlikely(sk->sk_type != SOCK_STREAM))
goto out;
rc = -EINVAL;
if (unlikely(sock->state != SS_UNCONNECTED ||
sk->sk_state != TCP_LISTEN))
goto out;
/* wait for a connection to arrive. */
if (skb_queue_empty(&sk->sk_receive_queue)) {
rc = llc_wait_data(sk, sk->sk_rcvtimeo);
if (rc)
goto out;
}
dprintk("%s: got a new connection on %02X\n", __func__,
llc_sk(sk)->laddr.lsap);
skb = skb_dequeue(&sk->sk_receive_queue);
rc = -EINVAL;
if (!skb->sk)
goto frees;
rc = 0;
newsk = skb->sk;
/* attach connection to a new socket. */
llc_ui_sk_init(newsock, newsk);
sock_reset_flag(newsk, SOCK_ZAPPED);
newsk->sk_state = TCP_ESTABLISHED;
newsock->state = SS_CONNECTED;
llc = llc_sk(sk);
newllc = llc_sk(newsk);
memcpy(&newllc->addr, &llc->addr, sizeof(newllc->addr));
newllc->link = llc_ui_next_link_no(newllc->laddr.lsap);
/* put original socket back into a clean listen state. */
sk->sk_state = TCP_LISTEN;
sk->sk_ack_backlog--;
dprintk("%s: ok success on %02X, client on %02X\n", __func__,
llc_sk(sk)->addr.sllc_sap, newllc->daddr.lsap);
frees:
kfree_skb(skb);
out:
release_sock(sk);
return rc;
}
/**
* llc_ui_recvmsg - copy received data to the socket user.
* @sock: Socket to copy data from.
* @msg: Various user space related information.
* @len: Size of user buffer.
* @flags: User specified flags.
*
* Copy received data to the socket user.
* Returns non-negative upon success, negative otherwise.
*/
static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
DECLARE_SOCKADDR(struct sockaddr_llc *, uaddr, msg->msg_name);
const int nonblock = flags & MSG_DONTWAIT;
struct sk_buff *skb = NULL;
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
size_t copied = 0;
u32 peek_seq = 0;
u32 *seq, skb_len;
unsigned long used;
int target; /* Read at least this many bytes */
long timeo;
lock_sock(sk);
copied = -ENOTCONN;
if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN))
goto out;
timeo = sock_rcvtimeo(sk, nonblock);
seq = &llc->copied_seq;
if (flags & MSG_PEEK) {
peek_seq = llc->copied_seq;
seq = &peek_seq;
}
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
copied = 0;
do {
u32 offset;
/*
* We need to check signals first, to get correct SIGURG
* handling. FIXME: Need to check this doesn't impact 1003.1g
* and move it down to the bottom of the loop
*/
if (signal_pending(current)) {
if (copied)
break;
copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
break;
}
/* Next get a buffer. */
skb = skb_peek(&sk->sk_receive_queue);
if (skb) {
offset = *seq;
goto found_ok_skb;
}
/* Well, if we have backlog, try to process it now yet. */
if (copied >= target && !sk->sk_backlog.tail)
break;
if (copied) {
if (sk->sk_err ||
sk->sk_state == TCP_CLOSE ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
!timeo ||
(flags & MSG_PEEK))
break;
} else {
if (sock_flag(sk, SOCK_DONE))
break;
if (sk->sk_err) {
copied = sock_error(sk);
break;
}
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
if (sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_CLOSE) {
if (!sock_flag(sk, SOCK_DONE)) {
/*
* This occurs when user tries to read
* from never connected socket.
*/
copied = -ENOTCONN;
break;
}
break;
}
if (!timeo) {
copied = -EAGAIN;
break;
}
}
if (copied >= target) { /* Do not sleep, just process backlog. */
release_sock(sk);
lock_sock(sk);
} else
sk_wait_data(sk, &timeo, NULL);
if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) {
net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n",
current->comm,
task_pid_nr(current));
peek_seq = llc->copied_seq;
}
continue;
found_ok_skb:
skb_len = skb->len;
/* Ok so how much can we use? */
used = skb->len - offset;
if (len < used)
used = len;
if (!(flags & MSG_TRUNC)) {
int rc = skb_copy_datagram_msg(skb, offset, msg, used);
if (rc) {
/* Exception. Bailout! */
if (!copied)
copied = -EFAULT;
break;
}
}
*seq += used;
copied += used;
len -= used;
/* For non stream protcols we get one packet per recvmsg call */
if (sk->sk_type != SOCK_STREAM)
goto copy_uaddr;
if (!(flags & MSG_PEEK)) {
llc: do not use sk_eat_skb() syzkaller triggered a use-after-free [1], caused by a combination of skb_get() in llc_conn_state_process() and usage of sk_eat_skb() sk_eat_skb() is assuming the skb about to be freed is only used by the current thread. TCP/DCCP stacks enforce this because current thread holds the socket lock. llc_conn_state_process() wants to make sure skb does not disappear, and holds a reference on the skb it manipulates. But as soon as this skb is added to socket receive queue, another thread can consume it. This means that llc must use regular skb_unlink() and kfree_skb() so that both producer and consumer can safely work on the same skb. [1] BUG: KASAN: use-after-free in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] BUG: KASAN: use-after-free in refcount_read include/linux/refcount.h:43 [inline] BUG: KASAN: use-after-free in skb_unref include/linux/skbuff.h:967 [inline] BUG: KASAN: use-after-free in kfree_skb+0xb7/0x580 net/core/skbuff.c:655 Read of size 4 at addr ffff8801d1f6fba4 by task ksoftirqd/1/18 CPU: 1 PID: 18 Comm: ksoftirqd/1 Not tainted 4.19.0-rc8+ #295 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b6 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272 atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] refcount_read include/linux/refcount.h:43 [inline] skb_unref include/linux/skbuff.h:967 [inline] kfree_skb+0xb7/0x580 net/core/skbuff.c:655 llc_sap_state_process+0x9b/0x550 net/llc/llc_sap.c:224 llc_sap_rcv+0x156/0x1f0 net/llc/llc_sap.c:297 llc_sap_handler+0x65e/0xf80 net/llc/llc_sap.c:438 llc_rcv+0x79e/0xe20 net/llc/llc_input.c:208 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 process_backlog+0x218/0x6f0 net/core/dev.c:5829 napi_poll net/core/dev.c:6249 [inline] net_rx_action+0x7c5/0x1950 net/core/dev.c:6315 __do_softirq+0x30c/0xb03 kernel/softirq.c:292 run_ksoftirqd+0x94/0x100 kernel/softirq.c:653 smpboot_thread_fn+0x68b/0xa00 kernel/smpboot.c:164 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:413 Allocated by task 18: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 kmem_cache_alloc_node+0x144/0x730 mm/slab.c:3644 __alloc_skb+0x119/0x770 net/core/skbuff.c:193 alloc_skb include/linux/skbuff.h:995 [inline] llc_alloc_frame+0xbc/0x370 net/llc/llc_sap.c:54 llc_station_ac_send_xid_r net/llc/llc_station.c:52 [inline] llc_station_rcv+0x1dc/0x1420 net/llc/llc_station.c:111 llc_rcv+0xc32/0xe20 net/llc/llc_input.c:220 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 process_backlog+0x218/0x6f0 net/core/dev.c:5829 napi_poll net/core/dev.c:6249 [inline] net_rx_action+0x7c5/0x1950 net/core/dev.c:6315 __do_softirq+0x30c/0xb03 kernel/softirq.c:292 Freed by task 16383: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kmem_cache_free+0x83/0x290 mm/slab.c:3756 kfree_skbmem+0x154/0x230 net/core/skbuff.c:582 __kfree_skb+0x1d/0x20 net/core/skbuff.c:642 sk_eat_skb include/net/sock.h:2366 [inline] llc_ui_recvmsg+0xec2/0x1610 net/llc/af_llc.c:882 sock_recvmsg_nosec net/socket.c:794 [inline] sock_recvmsg+0xd0/0x110 net/socket.c:801 ___sys_recvmsg+0x2b6/0x680 net/socket.c:2278 __sys_recvmmsg+0x303/0xb90 net/socket.c:2390 do_sys_recvmmsg+0x181/0x1a0 net/socket.c:2466 __do_sys_recvmmsg net/socket.c:2484 [inline] __se_sys_recvmmsg net/socket.c:2480 [inline] __x64_sys_recvmmsg+0xbe/0x150 net/socket.c:2480 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8801d1f6fac0 which belongs to the cache skbuff_head_cache of size 232 The buggy address is located 228 bytes inside of 232-byte region [ffff8801d1f6fac0, ffff8801d1f6fba8) The buggy address belongs to the page: page:ffffea000747dbc0 count:1 mapcount:0 mapping:ffff8801d9be7680 index:0xffff8801d1f6fe80 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0007346e88 ffffea000705b108 ffff8801d9be7680 raw: ffff8801d1f6fe80 ffff8801d1f6f0c0 000000010000000b 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801d1f6fa80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff8801d1f6fb00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8801d1f6fb80: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc ^ ffff8801d1f6fc00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801d1f6fc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-10-23 00:24:27 +08:00
skb_unlink(skb, &sk->sk_receive_queue);
kfree_skb(skb);
*seq = 0;
}
/* Partial read */
if (used + offset < skb_len)
continue;
} while (len > 0);
out:
release_sock(sk);
return copied;
copy_uaddr:
if (uaddr != NULL && skb != NULL) {
memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr));
msg->msg_namelen = sizeof(*uaddr);
}
if (llc_sk(sk)->cmsg_flags)
llc_cmsg_rcv(msg, skb);
if (!(flags & MSG_PEEK)) {
llc: do not use sk_eat_skb() syzkaller triggered a use-after-free [1], caused by a combination of skb_get() in llc_conn_state_process() and usage of sk_eat_skb() sk_eat_skb() is assuming the skb about to be freed is only used by the current thread. TCP/DCCP stacks enforce this because current thread holds the socket lock. llc_conn_state_process() wants to make sure skb does not disappear, and holds a reference on the skb it manipulates. But as soon as this skb is added to socket receive queue, another thread can consume it. This means that llc must use regular skb_unlink() and kfree_skb() so that both producer and consumer can safely work on the same skb. [1] BUG: KASAN: use-after-free in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] BUG: KASAN: use-after-free in refcount_read include/linux/refcount.h:43 [inline] BUG: KASAN: use-after-free in skb_unref include/linux/skbuff.h:967 [inline] BUG: KASAN: use-after-free in kfree_skb+0xb7/0x580 net/core/skbuff.c:655 Read of size 4 at addr ffff8801d1f6fba4 by task ksoftirqd/1/18 CPU: 1 PID: 18 Comm: ksoftirqd/1 Not tainted 4.19.0-rc8+ #295 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b6 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272 atomic_read include/asm-generic/atomic-instrumented.h:21 [inline] refcount_read include/linux/refcount.h:43 [inline] skb_unref include/linux/skbuff.h:967 [inline] kfree_skb+0xb7/0x580 net/core/skbuff.c:655 llc_sap_state_process+0x9b/0x550 net/llc/llc_sap.c:224 llc_sap_rcv+0x156/0x1f0 net/llc/llc_sap.c:297 llc_sap_handler+0x65e/0xf80 net/llc/llc_sap.c:438 llc_rcv+0x79e/0xe20 net/llc/llc_input.c:208 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 process_backlog+0x218/0x6f0 net/core/dev.c:5829 napi_poll net/core/dev.c:6249 [inline] net_rx_action+0x7c5/0x1950 net/core/dev.c:6315 __do_softirq+0x30c/0xb03 kernel/softirq.c:292 run_ksoftirqd+0x94/0x100 kernel/softirq.c:653 smpboot_thread_fn+0x68b/0xa00 kernel/smpboot.c:164 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:413 Allocated by task 18: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 kmem_cache_alloc_node+0x144/0x730 mm/slab.c:3644 __alloc_skb+0x119/0x770 net/core/skbuff.c:193 alloc_skb include/linux/skbuff.h:995 [inline] llc_alloc_frame+0xbc/0x370 net/llc/llc_sap.c:54 llc_station_ac_send_xid_r net/llc/llc_station.c:52 [inline] llc_station_rcv+0x1dc/0x1420 net/llc/llc_station.c:111 llc_rcv+0xc32/0xe20 net/llc/llc_input.c:220 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 process_backlog+0x218/0x6f0 net/core/dev.c:5829 napi_poll net/core/dev.c:6249 [inline] net_rx_action+0x7c5/0x1950 net/core/dev.c:6315 __do_softirq+0x30c/0xb03 kernel/softirq.c:292 Freed by task 16383: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kmem_cache_free+0x83/0x290 mm/slab.c:3756 kfree_skbmem+0x154/0x230 net/core/skbuff.c:582 __kfree_skb+0x1d/0x20 net/core/skbuff.c:642 sk_eat_skb include/net/sock.h:2366 [inline] llc_ui_recvmsg+0xec2/0x1610 net/llc/af_llc.c:882 sock_recvmsg_nosec net/socket.c:794 [inline] sock_recvmsg+0xd0/0x110 net/socket.c:801 ___sys_recvmsg+0x2b6/0x680 net/socket.c:2278 __sys_recvmmsg+0x303/0xb90 net/socket.c:2390 do_sys_recvmmsg+0x181/0x1a0 net/socket.c:2466 __do_sys_recvmmsg net/socket.c:2484 [inline] __se_sys_recvmmsg net/socket.c:2480 [inline] __x64_sys_recvmmsg+0xbe/0x150 net/socket.c:2480 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8801d1f6fac0 which belongs to the cache skbuff_head_cache of size 232 The buggy address is located 228 bytes inside of 232-byte region [ffff8801d1f6fac0, ffff8801d1f6fba8) The buggy address belongs to the page: page:ffffea000747dbc0 count:1 mapcount:0 mapping:ffff8801d9be7680 index:0xffff8801d1f6fe80 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0007346e88 ffffea000705b108 ffff8801d9be7680 raw: ffff8801d1f6fe80 ffff8801d1f6f0c0 000000010000000b 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801d1f6fa80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff8801d1f6fb00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8801d1f6fb80: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc ^ ffff8801d1f6fc00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801d1f6fc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-10-23 00:24:27 +08:00
skb_unlink(skb, &sk->sk_receive_queue);
kfree_skb(skb);
*seq = 0;
}
goto out;
}
/**
* llc_ui_sendmsg - Transmit data provided by the socket user.
* @sock: Socket to transmit data from.
* @msg: Various user related information.
* @len: Length of data to transmit.
*
* Transmit data provided by the socket user.
* Returns non-negative upon success, negative otherwise.
*/
static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
int flags = msg->msg_flags;
int noblock = flags & MSG_DONTWAIT;
struct sk_buff *skb;
size_t size = 0;
int rc = -EINVAL, copied = 0, hdrlen;
dprintk("%s: sending from %02X to %02X\n", __func__,
llc->laddr.lsap, llc->daddr.lsap);
lock_sock(sk);
if (addr) {
if (msg->msg_namelen < sizeof(*addr))
goto release;
} else {
if (llc_ui_addr_null(&llc->addr))
goto release;
addr = &llc->addr;
}
/* must bind connection to sap if user hasn't done it. */
if (sock_flag(sk, SOCK_ZAPPED)) {
/* bind to sap with null dev, exclusive. */
rc = llc_ui_autobind(sock, addr);
if (rc)
goto release;
}
hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr);
size = hdrlen + len;
if (size > llc->dev->mtu)
size = llc->dev->mtu;
copied = size - hdrlen;
llc: better deal with too small mtu syzbot loves to set very small mtu on devices, since it brings joy. We must make llc_ui_sendmsg() fool proof. usercopy: Kernel memory overwrite attempt detected to wrapped address (offset 0, size 18446612139802320068)! kernel BUG at mm/usercopy.c:100! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 17464 Comm: syz-executor1 Not tainted 4.17.0-rc3+ #36 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:usercopy_abort+0xbb/0xbd mm/usercopy.c:88 RSP: 0018:ffff8801868bf800 EFLAGS: 00010282 RAX: 000000000000006c RBX: ffffffff87d2fb00 RCX: 0000000000000000 RDX: 000000000000006c RSI: ffffffff81610731 RDI: ffffed0030d17ef6 RBP: ffff8801868bf858 R08: ffff88018daa4200 R09: ffffed003b5c4fb0 R10: ffffed003b5c4fb0 R11: ffff8801dae27d87 R12: ffffffff87d2f8e0 R13: ffffffff87d2f7a0 R14: ffffffff87d2f7a0 R15: ffffffff87d2f7a0 FS: 00007f56a14ac700(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2bc21000 CR3: 00000001abeb1000 CR4: 00000000001426f0 DR0: 0000000020000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000030602 Call Trace: check_bogus_address mm/usercopy.c:153 [inline] __check_object_size+0x5d9/0x5d9 mm/usercopy.c:256 check_object_size include/linux/thread_info.h:108 [inline] check_copy_size include/linux/thread_info.h:139 [inline] copy_from_iter_full include/linux/uio.h:121 [inline] memcpy_from_msg include/linux/skbuff.h:3305 [inline] llc_ui_sendmsg+0x4b1/0x1530 net/llc/af_llc.c:941 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:639 __sys_sendto+0x3d7/0x670 net/socket.c:1789 __do_sys_sendto net/socket.c:1801 [inline] __se_sys_sendto net/socket.c:1797 [inline] __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1797 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x455979 RSP: 002b:00007f56a14abc68 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f56a14ac6d4 RCX: 0000000000455979 RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000018 RBP: 000000000072bea0 R08: 00000000200012c0 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000000548 R14: 00000000006fbf60 R15: 0000000000000000 Code: 55 c0 e8 c0 55 bb ff ff 75 c8 48 8b 55 c0 4d 89 f9 ff 75 d0 4d 89 e8 48 89 d9 4c 89 e6 41 56 48 c7 c7 80 fa d2 87 e8 a0 0b a3 ff <0f> 0b e8 95 55 bb ff e8 c0 a8 f7 ff 8b 95 14 ff ff ff 4d 89 e8 RIP: usercopy_abort+0xbb/0xbd mm/usercopy.c:88 RSP: ffff8801868bf800 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-08 00:02:25 +08:00
rc = -EINVAL;
if (copied < 0)
goto release;
release_sock(sk);
skb = sock_alloc_send_skb(sk, size, noblock, &rc);
lock_sock(sk);
if (!skb)
goto release;
skb->dev = llc->dev;
skb->protocol = llc_proto_type(addr->sllc_arphrd);
skb_reserve(skb, hdrlen);
rc = memcpy_from_msg(skb_put(skb, copied), msg, copied);
if (rc)
goto out;
if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) {
llc_build_and_send_ui_pkt(llc->sap, skb, addr->sllc_mac,
addr->sllc_sap);
goto out;
}
if (addr->sllc_test) {
llc_build_and_send_test_pkt(llc->sap, skb, addr->sllc_mac,
addr->sllc_sap);
goto out;
}
if (addr->sllc_xid) {
llc_build_and_send_xid_pkt(llc->sap, skb, addr->sllc_mac,
addr->sllc_sap);
goto out;
}
rc = -ENOPROTOOPT;
if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua))
goto out;
rc = llc_ui_send_data(sk, skb, noblock);
out:
if (rc) {
kfree_skb(skb);
release:
dprintk("%s: failed sending from %02X to %02X: %d\n",
__func__, llc->laddr.lsap, llc->daddr.lsap, rc);
}
release_sock(sk);
return rc ? : copied;
}
/**
* llc_ui_getname - return the address info of a socket
* @sock: Socket to get address of.
* @uaddr: Address structure to return information.
* @uaddrlen: Length of address structure.
* @peer: Does user want local or remote address information.
*
* Return the address information of a socket.
*/
static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
net: make getname() functions return length rather than use int* parameter Changes since v1: Added changes in these files: drivers/infiniband/hw/usnic/usnic_transport.c drivers/staging/lustre/lnet/lnet/lib-socket.c drivers/target/iscsi/iscsi_target_login.c drivers/vhost/net.c fs/dlm/lowcomms.c fs/ocfs2/cluster/tcp.c security/tomoyo/network.c Before: All these functions either return a negative error indicator, or store length of sockaddr into "int *socklen" parameter and return zero on success. "int *socklen" parameter is awkward. For example, if caller does not care, it still needs to provide on-stack storage for the value it does not need. None of the many FOO_getname() functions of various protocols ever used old value of *socklen. They always just overwrite it. This change drops this parameter, and makes all these functions, on success, return length of sockaddr. It's always >= 0 and can be differentiated from an error. Tests in callers are changed from "if (err)" to "if (err < 0)", where needed. rpc_sockname() lost "int buflen" parameter, since its only use was to be passed to kernel_getsockname() as &buflen and subsequently not used in any way. Userspace API is not changed. text data bss dec hex filename 30108430 2633624 873672 33615726 200ef6e vmlinux.before.o 30108109 2633612 873672 33615393 200ee21 vmlinux.o Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com> CC: David S. Miller <davem@davemloft.net> CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-bluetooth@vger.kernel.org CC: linux-decnet-user@lists.sourceforge.net CC: linux-wireless@vger.kernel.org CC: linux-rdma@vger.kernel.org CC: linux-sctp@vger.kernel.org CC: linux-nfs@vger.kernel.org CC: linux-x25@vger.kernel.org Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-13 03:00:20 +08:00
int peer)
{
struct sockaddr_llc sllc;
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
int rc = -EBADF;
memset(&sllc, 0, sizeof(sllc));
lock_sock(sk);
if (sock_flag(sk, SOCK_ZAPPED))
goto out;
if (peer) {
rc = -ENOTCONN;
if (sk->sk_state != TCP_ESTABLISHED)
goto out;
if(llc->dev)
sllc.sllc_arphrd = llc->dev->type;
sllc.sllc_sap = llc->daddr.lsap;
memcpy(&sllc.sllc_mac, &llc->daddr.mac, IFHWADDRLEN);
} else {
rc = -EINVAL;
if (!llc->sap)
goto out;
sllc.sllc_sap = llc->sap->laddr.lsap;
if (llc->dev) {
sllc.sllc_arphrd = llc->dev->type;
memcpy(&sllc.sllc_mac, llc->dev->dev_addr,
IFHWADDRLEN);
}
}
sllc.sllc_family = AF_LLC;
memcpy(uaddr, &sllc, sizeof(sllc));
net: make getname() functions return length rather than use int* parameter Changes since v1: Added changes in these files: drivers/infiniband/hw/usnic/usnic_transport.c drivers/staging/lustre/lnet/lnet/lib-socket.c drivers/target/iscsi/iscsi_target_login.c drivers/vhost/net.c fs/dlm/lowcomms.c fs/ocfs2/cluster/tcp.c security/tomoyo/network.c Before: All these functions either return a negative error indicator, or store length of sockaddr into "int *socklen" parameter and return zero on success. "int *socklen" parameter is awkward. For example, if caller does not care, it still needs to provide on-stack storage for the value it does not need. None of the many FOO_getname() functions of various protocols ever used old value of *socklen. They always just overwrite it. This change drops this parameter, and makes all these functions, on success, return length of sockaddr. It's always >= 0 and can be differentiated from an error. Tests in callers are changed from "if (err)" to "if (err < 0)", where needed. rpc_sockname() lost "int buflen" parameter, since its only use was to be passed to kernel_getsockname() as &buflen and subsequently not used in any way. Userspace API is not changed. text data bss dec hex filename 30108430 2633624 873672 33615726 200ef6e vmlinux.before.o 30108109 2633612 873672 33615393 200ee21 vmlinux.o Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com> CC: David S. Miller <davem@davemloft.net> CC: linux-kernel@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-bluetooth@vger.kernel.org CC: linux-decnet-user@lists.sourceforge.net CC: linux-wireless@vger.kernel.org CC: linux-rdma@vger.kernel.org CC: linux-sctp@vger.kernel.org CC: linux-nfs@vger.kernel.org CC: linux-x25@vger.kernel.org Signed-off-by: David S. Miller <davem@davemloft.net>
2018-02-13 03:00:20 +08:00
rc = sizeof(sllc);
out:
release_sock(sk);
return rc;
}
/**
* llc_ui_ioctl - io controls for PF_LLC
* @sock: Socket to get/set info
* @cmd: command
* @arg: optional argument for cmd
*
* get/set info on llc sockets
*/
static int llc_ui_ioctl(struct socket *sock, unsigned int cmd,
unsigned long arg)
{
return -ENOIOCTLCMD;
}
/**
* llc_ui_setsockopt - set various connection specific parameters.
* @sock: Socket to set options on.
* @level: Socket level user is requesting operations on.
* @optname: Operation name.
* @optval: User provided operation data.
* @optlen: Length of optval.
*
* Set various connection specific parameters.
*/
static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
unsigned int opt;
int rc = -EINVAL;
lock_sock(sk);
if (unlikely(level != SOL_LLC || optlen != sizeof(int)))
goto out;
rc = get_user(opt, (int __user *)optval);
if (rc)
goto out;
rc = -EINVAL;
switch (optname) {
case LLC_OPT_RETRY:
if (opt > LLC_OPT_MAX_RETRY)
goto out;
llc->n2 = opt;
break;
case LLC_OPT_SIZE:
if (opt > LLC_OPT_MAX_SIZE)
goto out;
llc->n1 = opt;
break;
case LLC_OPT_ACK_TMR_EXP:
if (opt > LLC_OPT_MAX_ACK_TMR_EXP)
goto out;
llc->ack_timer.expire = opt * HZ;
break;
case LLC_OPT_P_TMR_EXP:
if (opt > LLC_OPT_MAX_P_TMR_EXP)
goto out;
llc->pf_cycle_timer.expire = opt * HZ;
break;
case LLC_OPT_REJ_TMR_EXP:
if (opt > LLC_OPT_MAX_REJ_TMR_EXP)
goto out;
llc->rej_sent_timer.expire = opt * HZ;
break;
case LLC_OPT_BUSY_TMR_EXP:
if (opt > LLC_OPT_MAX_BUSY_TMR_EXP)
goto out;
llc->busy_state_timer.expire = opt * HZ;
break;
case LLC_OPT_TX_WIN:
if (opt > LLC_OPT_MAX_WIN)
goto out;
llc->k = opt;
break;
case LLC_OPT_RX_WIN:
if (opt > LLC_OPT_MAX_WIN)
goto out;
llc->rw = opt;
break;
case LLC_OPT_PKTINFO:
if (opt)
llc->cmsg_flags |= LLC_CMSG_PKTINFO;
else
llc->cmsg_flags &= ~LLC_CMSG_PKTINFO;
break;
default:
rc = -ENOPROTOOPT;
goto out;
}
rc = 0;
out:
release_sock(sk);
return rc;
}
/**
* llc_ui_getsockopt - get connection specific socket info
* @sock: Socket to get information from.
* @level: Socket level user is requesting operations on.
* @optname: Operation name.
* @optval: Variable to return operation data in.
* @optlen: Length of optval.
*
* Get connection specific socket information.
*/
static int llc_ui_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
{
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
int val = 0, len = 0, rc = -EINVAL;
lock_sock(sk);
if (unlikely(level != SOL_LLC))
goto out;
rc = get_user(len, optlen);
if (rc)
goto out;
rc = -EINVAL;
if (len != sizeof(int))
goto out;
switch (optname) {
case LLC_OPT_RETRY:
val = llc->n2; break;
case LLC_OPT_SIZE:
val = llc->n1; break;
case LLC_OPT_ACK_TMR_EXP:
val = llc->ack_timer.expire / HZ; break;
case LLC_OPT_P_TMR_EXP:
val = llc->pf_cycle_timer.expire / HZ; break;
case LLC_OPT_REJ_TMR_EXP:
val = llc->rej_sent_timer.expire / HZ; break;
case LLC_OPT_BUSY_TMR_EXP:
val = llc->busy_state_timer.expire / HZ; break;
case LLC_OPT_TX_WIN:
val = llc->k; break;
case LLC_OPT_RX_WIN:
val = llc->rw; break;
case LLC_OPT_PKTINFO:
val = (llc->cmsg_flags & LLC_CMSG_PKTINFO) != 0;
break;
default:
rc = -ENOPROTOOPT;
goto out;
}
rc = 0;
if (put_user(len, optlen) || copy_to_user(optval, &val, len))
rc = -EFAULT;
out:
release_sock(sk);
return rc;
}
static const struct net_proto_family llc_ui_family_ops = {
.family = PF_LLC,
.create = llc_ui_create,
.owner = THIS_MODULE,
};
static const struct proto_ops llc_ui_ops = {
.family = PF_LLC,
.owner = THIS_MODULE,
.release = llc_ui_release,
.bind = llc_ui_bind,
.connect = llc_ui_connect,
.socketpair = sock_no_socketpair,
.accept = llc_ui_accept,
.getname = llc_ui_getname,
.poll = datagram_poll,
.ioctl = llc_ui_ioctl,
.listen = llc_ui_listen,
.shutdown = llc_ui_shutdown,
.setsockopt = llc_ui_setsockopt,
.getsockopt = llc_ui_getsockopt,
.sendmsg = llc_ui_sendmsg,
.recvmsg = llc_ui_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};
static const char llc_proc_err_msg[] __initconst =
KERN_CRIT "LLC: Unable to register the proc_fs entries\n";
static const char llc_sysctl_err_msg[] __initconst =
KERN_CRIT "LLC: Unable to register the sysctl entries\n";
static const char llc_sock_err_msg[] __initconst =
KERN_CRIT "LLC: Unable to register the network family\n";
static int __init llc2_init(void)
{
int rc = proto_register(&llc_proto, 0);
if (rc != 0)
goto out;
llc_build_offset_table();
llc_station_init();
llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
rc = llc_proc_init();
if (rc != 0) {
printk(llc_proc_err_msg);
goto out_station;
}
rc = llc_sysctl_init();
if (rc) {
printk(llc_sysctl_err_msg);
goto out_proc;
}
rc = sock_register(&llc_ui_family_ops);
if (rc) {
printk(llc_sock_err_msg);
goto out_sysctl;
}
llc_add_pack(LLC_DEST_SAP, llc_sap_handler);
llc_add_pack(LLC_DEST_CONN, llc_conn_handler);
out:
return rc;
out_sysctl:
llc_sysctl_exit();
out_proc:
llc_proc_exit();
out_station:
llc_station_exit();
proto_unregister(&llc_proto);
goto out;
}
static void __exit llc2_exit(void)
{
llc_station_exit();
llc_remove_pack(LLC_DEST_SAP);
llc_remove_pack(LLC_DEST_CONN);
sock_unregister(PF_LLC);
llc_proc_exit();
llc_sysctl_exit();
proto_unregister(&llc_proto);
}
module_init(llc2_init);
module_exit(llc2_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Procom 1997, Jay Schullist 2001, Arnaldo C. Melo 2001-2003");
MODULE_DESCRIPTION("IEEE 802.2 PF_LLC support");
MODULE_ALIAS_NETPROTO(PF_LLC);