OpenCloudOS-Kernel/net/sctp/bind_addr.c

555 lines
14 KiB
C
Raw Normal View History

/* SCTP kernel implementation
* (C) Copyright IBM Corp. 2001, 2003
* Copyright (c) Cisco 1999,2000
* Copyright (c) Motorola 1999,2000,2001
* Copyright (c) La Monte H.P. Yarroll 2001
*
* This file is part of the SCTP kernel implementation.
*
* A collection class to handle the storage of transport addresses.
*
* This SCTP implementation is free software;
* you can redistribute it and/or modify it under the terms of
* the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This SCTP implementation is distributed in the hope that it
* will be useful, but WITHOUT ANY WARRANTY; without even the implied
* ************************
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU CC; see the file COPYING. If not, see
* <http://www.gnu.org/licenses/>.
*
* Please send any bug reports or fixes you make to the
* email address(es):
* lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
* Karl Knutson <karl@athena.chicago.il.us>
* Jon Grimm <jgrimm@us.ibm.com>
* Daisy Chang <daisyc@us.ibm.com>
*/
#include <linux/types.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/in.h>
#include <net/sock.h>
#include <net/ipv6.h>
#include <net/if_inet6.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
/* Forward declarations for internal helpers. */
static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *,
union sctp_addr *, sctp_scope_t scope, gfp_t gfp,
int flags);
static void sctp_bind_addr_clean(struct sctp_bind_addr *);
/* First Level Abstractions. */
/* Copy 'src' to 'dest' taking 'scope' into account. Omit addresses
* in 'src' which have a broader scope than 'scope'.
*/
int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
const struct sctp_bind_addr *src,
sctp_scope_t scope, gfp_t gfp,
int flags)
{
struct sctp_sockaddr_entry *addr;
int error = 0;
/* All addresses share the same port. */
dest->port = src->port;
/* Extract the addresses which are relevant for this scope. */
list_for_each_entry(addr, &src->address_list, list) {
error = sctp_copy_one_addr(net, dest, &addr->a, scope,
gfp, flags);
if (error < 0)
goto out;
}
/* If there are no addresses matching the scope and
* this is global scope, try to get a link scope address, with
* the assumption that we must be sitting behind a NAT.
*/
if (list_empty(&dest->address_list) && (SCTP_SCOPE_GLOBAL == scope)) {
list_for_each_entry(addr, &src->address_list, list) {
error = sctp_copy_one_addr(net, dest, &addr->a,
SCTP_SCOPE_LINK, gfp,
flags);
if (error < 0)
goto out;
}
}
out:
if (error)
sctp_bind_addr_clean(dest);
return error;
}
/* Exactly duplicate the address lists. This is necessary when doing
* peer-offs and accepts. We don't want to put all the current system
* addresses into the endpoint. That's useless. But we do want duplicat
* the list of bound addresses that the older endpoint used.
*/
int sctp_bind_addr_dup(struct sctp_bind_addr *dest,
const struct sctp_bind_addr *src,
gfp_t gfp)
{
struct sctp_sockaddr_entry *addr;
int error = 0;
/* All addresses share the same port. */
dest->port = src->port;
list_for_each_entry(addr, &src->address_list, list) {
error = sctp_add_bind_addr(dest, &addr->a, sizeof(addr->a),
1, gfp);
if (error < 0)
break;
}
return error;
}
/* Initialize the SCTP_bind_addr structure for either an endpoint or
* an association.
*/
void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port)
{
INIT_LIST_HEAD(&bp->address_list);
bp->port = port;
}
/* Dispose of the address list. */
static void sctp_bind_addr_clean(struct sctp_bind_addr *bp)
{
SCTP: fix race between sctp_bind_addr_free() and sctp_bind_addr_conflict() During the sctp_close() call, we do not use rcu primitives to destroy the address list attached to the endpoint. At the same time, we do the removal of addresses from this list before attempting to remove the socket from the port hash As a result, it is possible for another process to find the socket in the port hash that is in the process of being closed. It then proceeds to traverse the address list to find the conflict, only to have that address list suddenly disappear without rcu() critical section. Fix issue by closing address list removal inside RCU critical section. Race can result in a kernel crash with general protection fault or kernel NULL pointer dereference: kernel: general protection fault: 0000 [#1] SMP kernel: RIP: 0010:[<ffffffffa02f3dde>] [<ffffffffa02f3dde>] sctp_bind_addr_conflict+0x64/0x82 [sctp] kernel: Call Trace: kernel: [<ffffffffa02f415f>] ? sctp_get_port_local+0x17b/0x2a3 [sctp] kernel: [<ffffffffa02f3d45>] ? sctp_bind_addr_match+0x33/0x68 [sctp] kernel: [<ffffffffa02f4416>] ? sctp_do_bind+0xd3/0x141 [sctp] kernel: [<ffffffffa02f5030>] ? sctp_bindx_add+0x4d/0x8e [sctp] kernel: [<ffffffffa02f5183>] ? sctp_setsockopt_bindx+0x112/0x4a4 [sctp] kernel: [<ffffffff81089e82>] ? generic_file_aio_write+0x7f/0x9b kernel: [<ffffffffa02f763e>] ? sctp_setsockopt+0x14f/0xfee [sctp] kernel: [<ffffffff810c11fb>] ? do_sync_write+0xab/0xeb kernel: [<ffffffff810e82ab>] ? fsnotify+0x239/0x282 kernel: [<ffffffff810c2462>] ? alloc_file+0x18/0xb1 kernel: [<ffffffff8134a0b1>] ? compat_sys_setsockopt+0x1a5/0x1d9 kernel: [<ffffffff8134aaf1>] ? compat_sys_socketcall+0x143/0x1a4 kernel: [<ffffffff810467dc>] ? sysenter_dispatch+0x7/0x32 Signed-off-by: Jacek Luczak <luczak.jacek@gmail.com> Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com> CC: Eric Dumazet <eric.dumazet@gmail.com> Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-05-19 17:55:13 +08:00
struct sctp_sockaddr_entry *addr, *temp;
/* Empty the bind address list. */
SCTP: fix race between sctp_bind_addr_free() and sctp_bind_addr_conflict() During the sctp_close() call, we do not use rcu primitives to destroy the address list attached to the endpoint. At the same time, we do the removal of addresses from this list before attempting to remove the socket from the port hash As a result, it is possible for another process to find the socket in the port hash that is in the process of being closed. It then proceeds to traverse the address list to find the conflict, only to have that address list suddenly disappear without rcu() critical section. Fix issue by closing address list removal inside RCU critical section. Race can result in a kernel crash with general protection fault or kernel NULL pointer dereference: kernel: general protection fault: 0000 [#1] SMP kernel: RIP: 0010:[<ffffffffa02f3dde>] [<ffffffffa02f3dde>] sctp_bind_addr_conflict+0x64/0x82 [sctp] kernel: Call Trace: kernel: [<ffffffffa02f415f>] ? sctp_get_port_local+0x17b/0x2a3 [sctp] kernel: [<ffffffffa02f3d45>] ? sctp_bind_addr_match+0x33/0x68 [sctp] kernel: [<ffffffffa02f4416>] ? sctp_do_bind+0xd3/0x141 [sctp] kernel: [<ffffffffa02f5030>] ? sctp_bindx_add+0x4d/0x8e [sctp] kernel: [<ffffffffa02f5183>] ? sctp_setsockopt_bindx+0x112/0x4a4 [sctp] kernel: [<ffffffff81089e82>] ? generic_file_aio_write+0x7f/0x9b kernel: [<ffffffffa02f763e>] ? sctp_setsockopt+0x14f/0xfee [sctp] kernel: [<ffffffff810c11fb>] ? do_sync_write+0xab/0xeb kernel: [<ffffffff810e82ab>] ? fsnotify+0x239/0x282 kernel: [<ffffffff810c2462>] ? alloc_file+0x18/0xb1 kernel: [<ffffffff8134a0b1>] ? compat_sys_setsockopt+0x1a5/0x1d9 kernel: [<ffffffff8134aaf1>] ? compat_sys_socketcall+0x143/0x1a4 kernel: [<ffffffff810467dc>] ? sysenter_dispatch+0x7/0x32 Signed-off-by: Jacek Luczak <luczak.jacek@gmail.com> Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com> CC: Eric Dumazet <eric.dumazet@gmail.com> Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2011-05-19 17:55:13 +08:00
list_for_each_entry_safe(addr, temp, &bp->address_list, list) {
list_del_rcu(&addr->list);
kfree_rcu(addr, rcu);
SCTP_DBG_OBJCNT_DEC(addr);
}
}
/* Dispose of an SCTP_bind_addr structure */
void sctp_bind_addr_free(struct sctp_bind_addr *bp)
{
/* Empty the bind address list. */
sctp_bind_addr_clean(bp);
}
/* Add an address to the bind address list in the SCTP_bind_addr structure. */
int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
int new_size, __u8 addr_state, gfp_t gfp)
{
struct sctp_sockaddr_entry *addr;
/* Add the address to the bind address list. */
addr = kzalloc(sizeof(*addr), gfp);
if (!addr)
return -ENOMEM;
memcpy(&addr->a, new, min_t(size_t, sizeof(*new), new_size));
/* Fix up the port if it has not yet been set.
* Both v4 and v6 have the port at the same offset.
*/
if (!addr->a.v4.sin_port)
addr->a.v4.sin_port = htons(bp->port);
addr->state = addr_state;
addr->valid = 1;
INIT_LIST_HEAD(&addr->list);
/* We always hold a socket lock when calling this function,
* and that acts as a writer synchronizing lock.
*/
list_add_tail_rcu(&addr->list, &bp->address_list);
SCTP_DBG_OBJCNT_INC(addr);
return 0;
}
/* Delete an address from the bind address list in the SCTP_bind_addr
* structure.
*/
int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
{
struct sctp_sockaddr_entry *addr, *temp;
int found = 0;
/* We hold the socket lock when calling this function,
* and that acts as a writer synchronizing lock.
*/
list_for_each_entry_safe(addr, temp, &bp->address_list, list) {
if (sctp_cmp_addr_exact(&addr->a, del_addr)) {
/* Found the exact match. */
found = 1;
addr->valid = 0;
list_del_rcu(&addr->list);
break;
}
}
if (found) {
kfree_rcu(addr, rcu);
SCTP_DBG_OBJCNT_DEC(addr);
return 0;
}
return -EINVAL;
}
/* Create a network byte-order representation of all the addresses
* formated as SCTP parameters.
*
* The second argument is the return value for the length.
*/
union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp,
int *addrs_len,
gfp_t gfp)
{
union sctp_params addrparms;
union sctp_params retval;
int addrparms_len;
union sctp_addr_param rawaddr;
int len;
struct sctp_sockaddr_entry *addr;
struct list_head *pos;
struct sctp_af *af;
addrparms_len = 0;
len = 0;
/* Allocate enough memory at once. */
list_for_each(pos, &bp->address_list) {
len += sizeof(union sctp_addr_param);
}
/* Don't even bother embedding an address if there
* is only one.
*/
if (len == sizeof(union sctp_addr_param)) {
retval.v = NULL;
goto end_raw;
}
retval.v = kmalloc(len, gfp);
if (!retval.v)
goto end_raw;
addrparms = retval;
list_for_each_entry(addr, &bp->address_list, list) {
af = sctp_get_af_specific(addr->a.v4.sin_family);
len = af->to_addr_param(&addr->a, &rawaddr);
memcpy(addrparms.v, &rawaddr, len);
addrparms.v += len;
addrparms_len += len;
}
end_raw:
*addrs_len = addrparms_len;
return retval;
}
/*
* Create an address list out of the raw address list format (IPv4 and IPv6
* address parameters).
*/
int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
int addrs_len, __u16 port, gfp_t gfp)
{
union sctp_addr_param *rawaddr;
struct sctp_paramhdr *param;
union sctp_addr addr;
int retval = 0;
int len;
struct sctp_af *af;
/* Convert the raw address to standard address format */
while (addrs_len) {
param = (struct sctp_paramhdr *)raw_addr_list;
rawaddr = (union sctp_addr_param *)raw_addr_list;
af = sctp_get_af_specific(param_type2af(param->type));
if (unlikely(!af)) {
retval = -EINVAL;
sctp_bind_addr_clean(bp);
break;
}
af->from_addr_param(&addr, rawaddr, htons(port), 0);
retval = sctp_add_bind_addr(bp, &addr, sizeof(addr),
SCTP_ADDR_SRC, gfp);
if (retval) {
/* Can't finish building the list, clean up. */
sctp_bind_addr_clean(bp);
break;
}
len = ntohs(param->length);
addrs_len -= len;
raw_addr_list += len;
}
return retval;
}
/********************************************************************
* 2nd Level Abstractions
********************************************************************/
/* Does this contain a specified address? Allow wildcarding. */
int sctp_bind_addr_match(struct sctp_bind_addr *bp,
const union sctp_addr *addr,
struct sctp_sock *opt)
{
struct sctp_sockaddr_entry *laddr;
int match = 0;
rcu_read_lock();
list_for_each_entry_rcu(laddr, &bp->address_list, list) {
if (!laddr->valid)
continue;
if (opt->pf->cmp_addr(&laddr->a, addr, opt)) {
match = 1;
break;
}
}
rcu_read_unlock();
return match;
}
/* Does the address 'addr' conflict with any addresses in
* the bp.
*/
int sctp_bind_addr_conflict(struct sctp_bind_addr *bp,
const union sctp_addr *addr,
struct sctp_sock *bp_sp,
struct sctp_sock *addr_sp)
{
struct sctp_sockaddr_entry *laddr;
int conflict = 0;
struct sctp_sock *sp;
/* Pick the IPv6 socket as the basis of comparison
* since it's usually a superset of the IPv4.
* If there is no IPv6 socket, then default to bind_addr.
*/
if (sctp_opt2sk(bp_sp)->sk_family == AF_INET6)
sp = bp_sp;
else if (sctp_opt2sk(addr_sp)->sk_family == AF_INET6)
sp = addr_sp;
else
sp = bp_sp;
rcu_read_lock();
list_for_each_entry_rcu(laddr, &bp->address_list, list) {
if (!laddr->valid)
continue;
conflict = sp->pf->cmp_addr(&laddr->a, addr, sp);
if (conflict)
break;
}
rcu_read_unlock();
return conflict;
}
/* Get the state of the entry in the bind_addr_list */
int sctp_bind_addr_state(const struct sctp_bind_addr *bp,
const union sctp_addr *addr)
{
struct sctp_sockaddr_entry *laddr;
struct sctp_af *af;
int state = -1;
af = sctp_get_af_specific(addr->sa.sa_family);
if (unlikely(!af))
return state;
rcu_read_lock();
list_for_each_entry_rcu(laddr, &bp->address_list, list) {
if (!laddr->valid)
continue;
if (af->cmp_addr(&laddr->a, addr)) {
state = laddr->state;
break;
}
}
rcu_read_unlock();
return state;
}
/* Find the first address in the bind address list that is not present in
* the addrs packed array.
*/
union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
const union sctp_addr *addrs,
int addrcnt,
struct sctp_sock *opt)
{
struct sctp_sockaddr_entry *laddr;
union sctp_addr *addr;
void *addr_buf;
struct sctp_af *af;
int i;
/* This is only called sctp_send_asconf_del_ip() and we hold
* the socket lock in that code patch, so that address list
* can't change.
*/
list_for_each_entry(laddr, &bp->address_list, list) {
addr_buf = (union sctp_addr *)addrs;
for (i = 0; i < addrcnt; i++) {
addr = addr_buf;
af = sctp_get_af_specific(addr->v4.sin_family);
if (!af)
break;
if (opt->pf->cmp_addr(&laddr->a, addr, opt))
break;
addr_buf += af->sockaddr_len;
}
if (i == addrcnt)
return &laddr->a;
}
return NULL;
}
/* Copy out addresses from the global local address list. */
static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
union sctp_addr *addr,
sctp_scope_t scope, gfp_t gfp,
int flags)
{
int error = 0;
if (sctp_is_any(NULL, addr)) {
error = sctp_copy_local_addr_list(net, dest, scope, gfp, flags);
} else if (sctp_in_scope(net, addr, scope)) {
/* Now that the address is in scope, check to see if
* the address type is supported by local sock as
* well as the remote peer.
*/
if ((((AF_INET == addr->sa.sa_family) &&
(flags & SCTP_ADDR4_PEERSUPP))) ||
(((AF_INET6 == addr->sa.sa_family) &&
(flags & SCTP_ADDR6_ALLOWED) &&
(flags & SCTP_ADDR6_PEERSUPP))))
error = sctp_add_bind_addr(dest, addr, sizeof(*addr),
SCTP_ADDR_SRC, gfp);
}
return error;
}
/* Is this a wildcard address? */
int sctp_is_any(struct sock *sk, const union sctp_addr *addr)
{
unsigned short fam = 0;
struct sctp_af *af;
/* Try to get the right address family */
if (addr->sa.sa_family != AF_UNSPEC)
fam = addr->sa.sa_family;
else if (sk)
fam = sk->sk_family;
af = sctp_get_af_specific(fam);
if (!af)
return 0;
return af->is_any(addr);
}
/* Is 'addr' valid for 'scope'? */
int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope)
{
sctp_scope_t addr_scope = sctp_scope(addr);
/* The unusable SCTP addresses will not be considered with
* any defined scopes.
*/
if (SCTP_SCOPE_UNUSABLE == addr_scope)
return 0;
/*
* For INIT and INIT-ACK address list, let L be the level of
* of requested destination address, sender and receiver
* SHOULD include all of its addresses with level greater
* than or equal to L.
*
* Address scoping can be selectively controlled via sysctl
* option
*/
switch (net->sctp.scope_policy) {
case SCTP_SCOPE_POLICY_DISABLE:
return 1;
case SCTP_SCOPE_POLICY_ENABLE:
if (addr_scope <= scope)
return 1;
break;
case SCTP_SCOPE_POLICY_PRIVATE:
if (addr_scope <= scope || SCTP_SCOPE_PRIVATE == addr_scope)
return 1;
break;
case SCTP_SCOPE_POLICY_LINK:
if (addr_scope <= scope || SCTP_SCOPE_LINK == addr_scope)
return 1;
break;
default:
break;
}
return 0;
}
int sctp_is_ep_boundall(struct sock *sk)
{
struct sctp_bind_addr *bp;
struct sctp_sockaddr_entry *addr;
bp = &sctp_sk(sk)->ep->base.bind_addr;
if (sctp_list_single_entry(&bp->address_list)) {
addr = list_entry(bp->address_list.next,
struct sctp_sockaddr_entry, list);
if (sctp_is_any(sk, &addr->a))
return 1;
}
return 0;
}
/********************************************************************
* 3rd Level Abstractions
********************************************************************/
/* What is the scope of 'addr'? */
sctp_scope_t sctp_scope(const union sctp_addr *addr)
{
struct sctp_af *af;
af = sctp_get_af_specific(addr->sa.sa_family);
if (!af)
return SCTP_SCOPE_UNUSABLE;
return af->scope((union sctp_addr *)addr);
}