2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* NET An implementation of the SOCKET network access protocol.
|
|
|
|
* This is the master header file for the Linux NET layer,
|
|
|
|
* or, in plain English: the networking handling part of the
|
|
|
|
* kernel.
|
|
|
|
*
|
|
|
|
* Version: @(#)net.h 1.0.3 05/25/93
|
|
|
|
*
|
|
|
|
* Authors: Orest Zborowski, <obz@Kodak.COM>
|
2005-05-06 07:16:16 +08:00
|
|
|
* Ross Biro
|
2005-04-17 06:20:36 +08:00
|
|
|
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version
|
|
|
|
* 2 of the License, or (at your option) any later version.
|
|
|
|
*/
|
|
|
|
#ifndef _LINUX_NET_H
|
|
|
|
#define _LINUX_NET_H
|
|
|
|
|
2006-04-25 21:46:09 +08:00
|
|
|
#include <linux/stringify.h>
|
2006-12-29 13:21:55 +08:00
|
|
|
#include <linux/random.h>
|
2008-08-26 22:29:22 +08:00
|
|
|
#include <linux/wait.h>
|
|
|
|
#include <linux/fcntl.h> /* For O_CLOEXEC and O_NONBLOCK */
|
2010-04-29 19:01:49 +08:00
|
|
|
#include <linux/rcupdate.h>
|
2015-10-08 07:20:35 +08:00
|
|
|
#include <linux/once.h>
|
2016-08-29 05:43:17 +08:00
|
|
|
#include <linux/fs.h>
|
2015-10-08 07:20:35 +08:00
|
|
|
|
2012-10-13 17:46:48 +08:00
|
|
|
#include <uapi/linux/net.h>
|
2008-08-26 22:29:22 +08:00
|
|
|
|
|
|
|
struct poll_table_struct;
|
|
|
|
struct pipe_inode_info;
|
|
|
|
struct inode;
|
2012-08-18 12:25:51 +08:00
|
|
|
struct file;
|
2008-08-26 22:29:22 +08:00
|
|
|
struct net;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2015-11-30 12:03:11 +08:00
|
|
|
/* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located
|
|
|
|
* in sock->flags, but moved into sk->sk_wq->flags to be RCU protected.
|
2017-08-15 19:28:54 +08:00
|
|
|
* Eventually all flags will be in sk->sk_wq->flags.
|
2015-11-30 12:03:11 +08:00
|
|
|
*/
|
2015-11-30 12:03:10 +08:00
|
|
|
#define SOCKWQ_ASYNC_NOSPACE 0
|
|
|
|
#define SOCKWQ_ASYNC_WAITDATA 1
|
2005-04-17 06:20:36 +08:00
|
|
|
#define SOCK_NOSPACE 2
|
|
|
|
#define SOCK_PASSCRED 3
|
[AF_UNIX]: Datagram getpeersec
This patch implements an API whereby an application can determine the
label of its peer's Unix datagram sockets via the auxiliary data mechanism of
recvmsg.
Patch purpose:
This patch enables a security-aware application to retrieve the
security context of the peer of a Unix datagram socket. The application
can then use this security context to determine the security context for
processing on behalf of the peer who sent the packet.
Patch design and implementation:
The design and implementation is very similar to the UDP case for INET
sockets. Basically we build upon the existing Unix domain socket API for
retrieving user credentials. Linux offers the API for obtaining user
credentials via ancillary messages (i.e., out of band/control messages
that are bundled together with a normal message). To retrieve the security
context, the application first indicates to the kernel such desire by
setting the SO_PASSSEC option via getsockopt. Then the application
retrieves the security context using the auxiliary data mechanism.
An example server application for Unix datagram socket should look like this:
toggle = 1;
toggle_len = sizeof(toggle);
setsockopt(sockfd, SOL_SOCKET, SO_PASSSEC, &toggle, &toggle_len);
recvmsg(sockfd, &msg_hdr, 0);
if (msg_hdr.msg_controllen > sizeof(struct cmsghdr)) {
cmsg_hdr = CMSG_FIRSTHDR(&msg_hdr);
if (cmsg_hdr->cmsg_len <= CMSG_LEN(sizeof(scontext)) &&
cmsg_hdr->cmsg_level == SOL_SOCKET &&
cmsg_hdr->cmsg_type == SCM_SECURITY) {
memcpy(&scontext, CMSG_DATA(cmsg_hdr), sizeof(scontext));
}
}
sock_setsockopt is enhanced with a new socket option SOCK_PASSSEC to allow
a server socket to receive security context of the peer.
Testing:
We have tested the patch by setting up Unix datagram client and server
applications. We verified that the server can retrieve the security context
using the auxiliary data mechanism of recvmsg.
Signed-off-by: Catherine Zhang <cxzhang@watson.ibm.com>
Acked-by: Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-30 03:27:47 +08:00
|
|
|
#define SOCK_PASSSEC 4
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#ifndef ARCH_HAS_SOCKET_TYPES
|
2005-05-01 23:59:25 +08:00
|
|
|
/**
|
|
|
|
* enum sock_type - Socket types
|
|
|
|
* @SOCK_STREAM: stream (connection) socket
|
|
|
|
* @SOCK_DGRAM: datagram (conn.less) socket
|
|
|
|
* @SOCK_RAW: raw socket
|
|
|
|
* @SOCK_RDM: reliably-delivered message
|
|
|
|
* @SOCK_SEQPACKET: sequential packet socket
|
2005-11-07 17:01:05 +08:00
|
|
|
* @SOCK_DCCP: Datagram Congestion Control Protocol socket
|
2005-05-01 23:59:25 +08:00
|
|
|
* @SOCK_PACKET: linux specific way of getting packets at the dev level.
|
|
|
|
* For writing rarp and other similar things on the user level.
|
|
|
|
*
|
2005-04-17 06:20:36 +08:00
|
|
|
* When adding some new socket type please
|
|
|
|
* grep ARCH_HAS_SOCKET_TYPE include/asm-* /socket.h, at least MIPS
|
|
|
|
* overrides this enum for binary compat reasons.
|
|
|
|
*/
|
|
|
|
enum sock_type {
|
|
|
|
SOCK_STREAM = 1,
|
|
|
|
SOCK_DGRAM = 2,
|
|
|
|
SOCK_RAW = 3,
|
|
|
|
SOCK_RDM = 4,
|
|
|
|
SOCK_SEQPACKET = 5,
|
2005-08-10 11:14:34 +08:00
|
|
|
SOCK_DCCP = 6,
|
2005-04-17 06:20:36 +08:00
|
|
|
SOCK_PACKET = 10,
|
|
|
|
};
|
|
|
|
|
|
|
|
#define SOCK_MAX (SOCK_PACKET + 1)
|
flag parameters: socket and socketpair
This patch adds support for flag values which are ORed to the type passwd
to socket and socketpair. The additional code is minimal. The flag
values in this implementation can and must match the O_* flags. This
avoids overhead in the conversion.
The internal functions sock_alloc_fd and sock_map_fd get a new parameters
and all callers are changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#define PORT 57392
/* For Linux these must be the same. */
#define SOCK_CLOEXEC O_CLOEXEC
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("socket(0) set close-on-exec flag");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
if (fd == -1)
{
puts ("socket(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
{
puts ("socketpair(SOCK_CLOEXEC) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 12:29:17 +08:00
|
|
|
/* Mask which covers at least up to SOCK_MASK-1. The
|
|
|
|
* remaining bits are used as flags. */
|
|
|
|
#define SOCK_TYPE_MASK 0xf
|
|
|
|
|
reintroduce accept4
Introduce a new accept4() system call. The addition of this system call
matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
inotify_init1(), epoll_create1(), pipe2()) which added new system calls
that differed from analogous traditional system calls in adding a flags
argument that can be used to access additional functionality.
The accept4() system call is exactly the same as accept(), except that
it adds a flags bit-mask argument. Two flags are initially implemented.
(Most of the new system calls in 2.6.27 also had both of these flags.)
SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled
for the new file descriptor returned by accept4(). This is a useful
security feature to avoid leaking information in a multithreaded
program where one thread is doing an accept() at the same time as
another thread is doing a fork() plus exec(). More details here:
http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling",
Ulrich Drepper).
The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag
to be enabled on the new open file description created by accept4().
(This flag is merely a convenience, saving the use of additional calls
fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result.
Here's a test program. Works on x86-32. Should work on x86-64, but
I (mtk) don't have a system to hand to test with.
It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies
that the appropriate flags are set on the file descriptor/open file
description returned by accept4().
I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.
/* test_accept4.c
Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
<mtk.manpages@gmail.com>
Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#define PORT_NUM 33333
#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)
/**********************************************************************/
/* The following is what we need until glibc gets a wrapper for
accept4() */
/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
#endif
#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif
static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
printf("Calling accept4(): flags = %x", flags);
if (flags != 0) {
printf(" (");
if (flags & SOCK_CLOEXEC)
printf("SOCK_CLOEXEC");
if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
printf(" ");
if (flags & SOCK_NONBLOCK)
printf("SOCK_NONBLOCK");
printf(")");
}
printf("\n");
#if USE_SOCKETCALL
long args[6];
args[0] = fd;
args[1] = (long) sockaddr;
args[2] = (long) addrlen;
args[3] = flags;
return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}
/**********************************************************************/
static int
do_test(int lfd, struct sockaddr_in *conn_addr,
int closeonexec_flag, int nonblock_flag)
{
int connfd, acceptfd;
int fdf, flf, fdf_pass, flf_pass;
struct sockaddr_in claddr;
socklen_t addrlen;
printf("=======================================\n");
connfd = socket(AF_INET, SOCK_STREAM, 0);
if (connfd == -1)
die("socket");
if (connect(connfd, (struct sockaddr *) conn_addr,
sizeof(struct sockaddr_in)) == -1)
die("connect");
addrlen = sizeof(struct sockaddr_in);
acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
closeonexec_flag | nonblock_flag);
if (acceptfd == -1) {
perror("accept4()");
close(connfd);
return 0;
}
fdf = fcntl(acceptfd, F_GETFD);
if (fdf == -1)
die("fcntl:F_GETFD");
fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
((closeonexec_flag & SOCK_CLOEXEC) != 0);
printf("Close-on-exec flag is %sset (%s); ",
(fdf & FD_CLOEXEC) ? "" : "not ",
fdf_pass ? "OK" : "failed");
flf = fcntl(acceptfd, F_GETFL);
if (flf == -1)
die("fcntl:F_GETFD");
flf_pass = ((flf & O_NONBLOCK) != 0) ==
((nonblock_flag & SOCK_NONBLOCK) !=0);
printf("nonblock flag is %sset (%s)\n",
(flf & O_NONBLOCK) ? "" : "not ",
flf_pass ? "OK" : "failed");
close(acceptfd);
close(connfd);
printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
return fdf_pass && flf_pass;
}
static int
create_listening_socket(int port_num)
{
struct sockaddr_in svaddr;
int lfd;
int optval;
memset(&svaddr, 0, sizeof(struct sockaddr_in));
svaddr.sin_family = AF_INET;
svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
svaddr.sin_port = htons(port_num);
lfd = socket(AF_INET, SOCK_STREAM, 0);
if (lfd == -1)
die("socket");
optval = 1;
if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof(optval)) == -1)
die("setsockopt");
if (bind(lfd, (struct sockaddr *) &svaddr,
sizeof(struct sockaddr_in)) == -1)
die("bind");
if (listen(lfd, 5) == -1)
die("listen");
return lfd;
}
int
main(int argc, char *argv[])
{
struct sockaddr_in conn_addr;
int lfd;
int port_num;
int passed;
passed = 1;
port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;
memset(&conn_addr, 0, sizeof(struct sockaddr_in));
conn_addr.sin_family = AF_INET;
conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
conn_addr.sin_port = htons(port_num);
lfd = create_listening_socket(port_num);
if (!do_test(lfd, &conn_addr, 0, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
passed = 0;
close(lfd);
exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}
[mtk.manpages@gmail.com: rewrote changelog, updated test program]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-11-20 07:36:14 +08:00
|
|
|
/* Flags for socket, socketpair, accept4 */
|
flag parameters: socket and socketpair
This patch adds support for flag values which are ORed to the type passwd
to socket and socketpair. The additional code is minimal. The flag
values in this implementation can and must match the O_* flags. This
avoids overhead in the conversion.
The internal functions sock_alloc_fd and sock_map_fd get a new parameters
and all callers are changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#define PORT 57392
/* For Linux these must be the same. */
#define SOCK_CLOEXEC O_CLOEXEC
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("socket(0) set close-on-exec flag");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
if (fd == -1)
{
puts ("socket(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
{
puts ("socketpair(SOCK_CLOEXEC) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 12:29:17 +08:00
|
|
|
#define SOCK_CLOEXEC O_CLOEXEC
|
2008-07-24 12:29:21 +08:00
|
|
|
#ifndef SOCK_NONBLOCK
|
|
|
|
#define SOCK_NONBLOCK O_NONBLOCK
|
|
|
|
#endif
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#endif /* ARCH_HAS_SOCKET_TYPES */
|
|
|
|
|
2019-03-14 21:45:23 +08:00
|
|
|
/**
|
|
|
|
* enum sock_shutdown_cmd - Shutdown types
|
|
|
|
* @SHUT_RD: shutdown receptions
|
|
|
|
* @SHUT_WR: shutdown transmissions
|
|
|
|
* @SHUT_RDWR: shutdown receptions/transmissions
|
|
|
|
*/
|
2007-11-13 10:10:39 +08:00
|
|
|
enum sock_shutdown_cmd {
|
2013-06-02 00:23:16 +08:00
|
|
|
SHUT_RD,
|
|
|
|
SHUT_WR,
|
|
|
|
SHUT_RDWR,
|
2007-11-13 10:10:39 +08:00
|
|
|
};
|
|
|
|
|
2010-04-29 19:01:49 +08:00
|
|
|
struct socket_wq {
|
2011-02-18 11:26:36 +08:00
|
|
|
/* Note: wait MUST be first field of socket_wq */
|
2010-04-29 19:01:49 +08:00
|
|
|
wait_queue_head_t wait;
|
|
|
|
struct fasync_struct *fasync_list;
|
2015-11-30 12:03:11 +08:00
|
|
|
unsigned long flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */
|
2010-04-29 19:01:49 +08:00
|
|
|
struct rcu_head rcu;
|
|
|
|
} ____cacheline_aligned_in_smp;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/**
|
|
|
|
* struct socket - general BSD socket
|
2005-05-01 23:59:25 +08:00
|
|
|
* @state: socket state (%SS_CONNECTED, etc)
|
2008-07-08 18:03:01 +08:00
|
|
|
* @type: socket type (%SOCK_STREAM, etc)
|
2015-11-30 12:03:10 +08:00
|
|
|
* @flags: socket flags (%SOCK_NOSPACE, etc)
|
2005-05-01 23:59:25 +08:00
|
|
|
* @ops: protocol specific socket operations
|
|
|
|
* @file: File back pointer for gc
|
|
|
|
* @sk: internal networking protocol agnostic socket representation
|
2010-07-01 21:18:58 +08:00
|
|
|
* @wq: wait queue for several uses
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
|
|
|
struct socket {
|
|
|
|
socket_state state;
|
2009-09-15 17:39:20 +08:00
|
|
|
|
2008-07-08 18:03:01 +08:00
|
|
|
short type;
|
2009-09-15 17:39:20 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned long flags;
|
2010-04-29 19:01:49 +08:00
|
|
|
|
2018-07-30 15:45:07 +08:00
|
|
|
struct socket_wq *wq;
|
2009-03-16 10:59:13 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
struct file *file;
|
|
|
|
struct sock *sk;
|
2009-03-16 10:59:13 +08:00
|
|
|
const struct proto_ops *ops;
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct vm_area_struct;
|
|
|
|
struct page;
|
|
|
|
struct sockaddr;
|
|
|
|
struct msghdr;
|
|
|
|
struct module;
|
2016-08-29 05:43:17 +08:00
|
|
|
struct sk_buff;
|
|
|
|
typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
|
|
|
|
unsigned int, size_t);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
struct proto_ops {
|
|
|
|
int family;
|
|
|
|
struct module *owner;
|
|
|
|
int (*release) (struct socket *sock);
|
|
|
|
int (*bind) (struct socket *sock,
|
|
|
|
struct sockaddr *myaddr,
|
|
|
|
int sockaddr_len);
|
|
|
|
int (*connect) (struct socket *sock,
|
|
|
|
struct sockaddr *vaddr,
|
|
|
|
int sockaddr_len, int flags);
|
|
|
|
int (*socketpair)(struct socket *sock1,
|
|
|
|
struct socket *sock2);
|
|
|
|
int (*accept) (struct socket *sock,
|
2017-03-09 16:09:05 +08:00
|
|
|
struct socket *newsock, int flags, bool kern);
|
2005-04-17 06:20:36 +08:00
|
|
|
int (*getname) (struct socket *sock,
|
|
|
|
struct sockaddr *addr,
|
2018-02-13 03:00:20 +08:00
|
|
|
int peer);
|
2017-07-03 10:22:01 +08:00
|
|
|
__poll_t (*poll) (struct file *file, struct socket *sock,
|
2005-04-17 06:20:36 +08:00
|
|
|
struct poll_table_struct *wait);
|
|
|
|
int (*ioctl) (struct socket *sock, unsigned int cmd,
|
|
|
|
unsigned long arg);
|
2010-02-01 17:44:19 +08:00
|
|
|
#ifdef CONFIG_COMPAT
|
2006-03-22 15:58:08 +08:00
|
|
|
int (*compat_ioctl) (struct socket *sock, unsigned int cmd,
|
|
|
|
unsigned long arg);
|
2010-02-01 17:44:19 +08:00
|
|
|
#endif
|
2019-04-18 04:51:48 +08:00
|
|
|
int (*gettstamp) (struct socket *sock, void __user *userstamp,
|
|
|
|
bool timeval, bool time32);
|
2005-04-17 06:20:36 +08:00
|
|
|
int (*listen) (struct socket *sock, int len);
|
|
|
|
int (*shutdown) (struct socket *sock, int flags);
|
|
|
|
int (*setsockopt)(struct socket *sock, int level,
|
2009-10-01 07:12:20 +08:00
|
|
|
int optname, char __user *optval, unsigned int optlen);
|
2005-04-17 06:20:36 +08:00
|
|
|
int (*getsockopt)(struct socket *sock, int level,
|
|
|
|
int optname, char __user *optval, int __user *optlen);
|
2010-02-01 17:44:19 +08:00
|
|
|
#ifdef CONFIG_COMPAT
|
2006-03-21 14:45:21 +08:00
|
|
|
int (*compat_setsockopt)(struct socket *sock, int level,
|
2009-10-01 07:12:20 +08:00
|
|
|
int optname, char __user *optval, unsigned int optlen);
|
2006-03-21 14:45:21 +08:00
|
|
|
int (*compat_getsockopt)(struct socket *sock, int level,
|
|
|
|
int optname, char __user *optval, int __user *optlen);
|
2010-02-01 17:44:19 +08:00
|
|
|
#endif
|
2015-03-02 15:37:48 +08:00
|
|
|
int (*sendmsg) (struct socket *sock, struct msghdr *m,
|
|
|
|
size_t total_len);
|
2013-11-21 10:14:22 +08:00
|
|
|
/* Notes for implementing recvmsg:
|
|
|
|
* ===============================
|
|
|
|
* msg->msg_namelen should get updated by the recvmsg handlers
|
|
|
|
* iff msg_name != NULL. It is by default 0 to prevent
|
|
|
|
* returning uninitialized memory to user space. The recvfrom
|
|
|
|
* handlers can assume that msg.msg_name is either NULL or has
|
|
|
|
* a minimum size of sizeof(struct sockaddr_storage).
|
|
|
|
*/
|
2015-03-02 15:37:48 +08:00
|
|
|
int (*recvmsg) (struct socket *sock, struct msghdr *m,
|
|
|
|
size_t total_len, int flags);
|
2005-04-17 06:20:36 +08:00
|
|
|
int (*mmap) (struct file *file, struct socket *sock,
|
|
|
|
struct vm_area_struct * vma);
|
|
|
|
ssize_t (*sendpage) (struct socket *sock, struct page *page,
|
|
|
|
int offset, size_t size, int flags);
|
2007-11-07 15:30:13 +08:00
|
|
|
ssize_t (*splice_read)(struct socket *sock, loff_t *ppos,
|
|
|
|
struct pipe_inode_info *pipe, size_t len, unsigned int flags);
|
2013-12-08 06:26:27 +08:00
|
|
|
int (*set_peek_off)(struct sock *sk, int val);
|
2016-06-30 14:45:36 +08:00
|
|
|
int (*peek_len)(struct socket *sock);
|
2017-07-29 07:22:41 +08:00
|
|
|
|
|
|
|
/* The following functions are called internally by kernel with
|
|
|
|
* sock lock already held.
|
|
|
|
*/
|
2016-08-29 05:43:17 +08:00
|
|
|
int (*read_sock)(struct sock *sk, read_descriptor_t *desc,
|
|
|
|
sk_read_actor_t recv_actor);
|
2017-07-29 07:22:41 +08:00
|
|
|
int (*sendpage_locked)(struct sock *sk, struct page *page,
|
|
|
|
int offset, size_t size, int flags);
|
|
|
|
int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
|
|
|
|
size_t size);
|
2018-04-17 01:33:35 +08:00
|
|
|
int (*set_rcvlowat)(struct sock *sk, int val);
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2009-10-29 17:59:18 +08:00
|
|
|
#define DECLARE_SOCKADDR(type, dst, src) \
|
|
|
|
type dst = ({ __sockaddr_check_size(sizeof(*dst)); (type) src; })
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
struct net_proto_family {
|
|
|
|
int family;
|
2009-11-06 14:18:14 +08:00
|
|
|
int (*create)(struct net *net, struct socket *sock,
|
|
|
|
int protocol, int kern);
|
2005-04-17 06:20:36 +08:00
|
|
|
struct module *owner;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct iovec;
|
|
|
|
struct kvec;
|
|
|
|
|
2007-11-26 20:10:50 +08:00
|
|
|
enum {
|
|
|
|
SOCK_WAKE_IO,
|
|
|
|
SOCK_WAKE_WAITD,
|
|
|
|
SOCK_WAKE_SPACE,
|
|
|
|
SOCK_WAKE_URG,
|
|
|
|
};
|
|
|
|
|
2015-11-30 12:03:11 +08:00
|
|
|
int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
|
2013-09-27 05:48:15 +08:00
|
|
|
int sock_register(const struct net_proto_family *fam);
|
|
|
|
void sock_unregister(int family);
|
sock_diag: request _diag module only when the family or proto has been registered
Now when using 'ss' in iproute, kernel would try to load all _diag
modules, which also causes corresponding family and proto modules
to be loaded as well due to module dependencies.
Like after running 'ss', sctp, dccp, af_packet (if it works as a module)
would be loaded.
For example:
$ lsmod|grep sctp
$ ss
$ lsmod|grep sctp
sctp_diag 16384 0
sctp 323584 5 sctp_diag
inet_diag 24576 4 raw_diag,tcp_diag,sctp_diag,udp_diag
libcrc32c 16384 3 nf_conntrack,nf_nat,sctp
As these family and proto modules are loaded unintentionally, it
could cause some problems, like:
- Some debug tools use 'ss' to collect the socket info, which loads all
those diag and family and protocol modules. It's noisy for identifying
issues.
- Users usually expect to drop sctp init packet silently when they
have no sense of sctp protocol instead of sending abort back.
- It wastes resources (especially with multiple netns), and SCTP module
can't be unloaded once it's loaded.
...
In short, it's really inappropriate to have these family and proto
modules loaded unexpectedly when just doing debugging with inet_diag.
This patch is to introduce sock_load_diag_module() where it loads
the _diag module only when it's corresponding family or proto has
been already registered.
Note that we can't just load _diag module without the family or
proto loaded, as some symbols used in _diag module are from the
family or proto module.
v1->v2:
- move inet proto check to inet_diag to avoid a compiling err.
v2->v3:
- define sock_load_diag_module in sock.c and export one symbol
only.
- improve the changelog.
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Phil Sutter <phil@nwl.cc>
Acked-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-10 18:57:50 +08:00
|
|
|
bool sock_is_registered(int family);
|
2013-09-27 05:48:15 +08:00
|
|
|
int __sock_create(struct net *net, int family, int type, int proto,
|
|
|
|
struct socket **res, int kern);
|
|
|
|
int sock_create(int family, int type, int proto, struct socket **res);
|
2015-05-09 10:08:05 +08:00
|
|
|
int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res);
|
2013-09-27 05:48:15 +08:00
|
|
|
int sock_create_lite(int family, int type, int proto, struct socket **res);
|
2016-03-08 06:11:01 +08:00
|
|
|
struct socket *sock_alloc(void);
|
2013-09-27 05:48:15 +08:00
|
|
|
void sock_release(struct socket *sock);
|
2014-12-11 13:02:50 +08:00
|
|
|
int sock_sendmsg(struct socket *sock, struct msghdr *msg);
|
2015-03-15 09:13:46 +08:00
|
|
|
int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags);
|
2013-09-27 05:48:15 +08:00
|
|
|
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname);
|
|
|
|
struct socket *sockfd_lookup(int fd, int *err);
|
|
|
|
struct socket *sock_from_file(struct file *file, int *err);
|
2005-04-17 06:20:36 +08:00
|
|
|
#define sockfd_put(sock) fput(sock->file)
|
2013-09-27 05:48:15 +08:00
|
|
|
int net_ratelimit(void);
|
2006-10-17 15:09:42 +08:00
|
|
|
|
2012-05-14 05:56:25 +08:00
|
|
|
#define net_ratelimited_function(function, ...) \
|
|
|
|
do { \
|
|
|
|
if (net_ratelimit()) \
|
|
|
|
function(__VA_ARGS__); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define net_emerg_ratelimited(fmt, ...) \
|
|
|
|
net_ratelimited_function(pr_emerg, fmt, ##__VA_ARGS__)
|
|
|
|
#define net_alert_ratelimited(fmt, ...) \
|
|
|
|
net_ratelimited_function(pr_alert, fmt, ##__VA_ARGS__)
|
|
|
|
#define net_crit_ratelimited(fmt, ...) \
|
|
|
|
net_ratelimited_function(pr_crit, fmt, ##__VA_ARGS__)
|
|
|
|
#define net_err_ratelimited(fmt, ...) \
|
|
|
|
net_ratelimited_function(pr_err, fmt, ##__VA_ARGS__)
|
|
|
|
#define net_notice_ratelimited(fmt, ...) \
|
|
|
|
net_ratelimited_function(pr_notice, fmt, ##__VA_ARGS__)
|
|
|
|
#define net_warn_ratelimited(fmt, ...) \
|
|
|
|
net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__)
|
|
|
|
#define net_info_ratelimited(fmt, ...) \
|
|
|
|
net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__)
|
2016-04-30 01:30:23 +08:00
|
|
|
#if defined(CONFIG_DYNAMIC_DEBUG)
|
|
|
|
#define net_dbg_ratelimited(fmt, ...) \
|
|
|
|
do { \
|
|
|
|
DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \
|
2019-03-08 08:27:25 +08:00
|
|
|
if (DYNAMIC_DEBUG_BRANCH(descriptor) && \
|
2016-04-30 01:30:23 +08:00
|
|
|
net_ratelimit()) \
|
net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG
The implementation of net_dbg_ratelimited in the CONFIG_DYNAMIC_DEBUG
case was added with 2c94b5373 ("net: Implement net_dbg_ratelimited() for
CONFIG_DYNAMIC_DEBUG case"). The implementation strategy was to take the
usual definition of the dynamic_pr_debug macro, but alter it by adding a
call to "net_ratelimit()" in the if statement. This is, in fact, the
correct approach.
However, while doing this, the author of the commit forgot to surround
fmt by pr_fmt, resulting in unprefixed log messages appearing in the
console. So, this commit adds back the pr_fmt(fmt) invocation, making
net_dbg_ratelimited properly consistent across DEBUG, no DEBUG, and
DYNAMIC_DEBUG cases, and bringing parity with the behavior of
dynamic_pr_debug as well.
Fixes: 2c94b5373 ("net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Tim Bingham <tbingham@akamai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-15 17:14:53 +08:00
|
|
|
__dynamic_pr_debug(&descriptor, pr_fmt(fmt), \
|
|
|
|
##__VA_ARGS__); \
|
2016-04-30 01:30:23 +08:00
|
|
|
} while (0)
|
|
|
|
#elif defined(DEBUG)
|
2012-05-14 05:56:25 +08:00
|
|
|
#define net_dbg_ratelimited(fmt, ...) \
|
|
|
|
net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__)
|
net_dbg_ratelimited: turn into no-op when !DEBUG
The pr_debug family of functions turns into a no-op when -DDEBUG is not
specified, opting instead to call "no_printk", which gets compiled to a
no-op (but retains gcc's nice warnings about printf-style arguments).
The problem with net_dbg_ratelimited is that it is defined to be a
variant of net_ratelimited_function, which expands to essentially:
if (net_ratelimit())
pr_debug(fmt, ...);
When DEBUG is not defined, then this becomes,
if (net_ratelimit())
;
This seems benign, except it isn't. Firstly, there's the obvious
overhead of calling net_ratelimit needlessly, which does quite some book
keeping for the rate limiting. Given that the pr_debug and
net_dbg_ratelimited family of functions are sprinkled liberally through
performance critical code, with developers assuming they'll be compiled
out to a no-op most of the time, we certainly do not want this needless
book keeping. Secondly, and most visibly, even though no debug message
is printed when DEBUG is not defined, if there is a flood of
invocations, dmesg winds up peppered with messages such as
"net_ratelimit: 320 callbacks suppressed". This is because our
aforementioned net_ratelimit() function actually prints this text in
some circumstances. It's especially odd to see this when there isn't any
other accompanying debug message.
So, in sum, it doesn't make sense to have this function's current
behavior, and instead it should match what every other debug family of
functions in the kernel does with !DEBUG -- nothing.
This patch replaces calls to net_dbg_ratelimited when !DEBUG with
no_printk, keeping with the idiom of all the other debug print helpers.
Also, though not strictly neccessary, it guards the call with an if (0)
so that all evaluation of any arguments are sure to be compiled out.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-05 00:26:19 +08:00
|
|
|
#else
|
|
|
|
#define net_dbg_ratelimited(fmt, ...) \
|
|
|
|
do { \
|
|
|
|
if (0) \
|
|
|
|
no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
|
|
|
|
} while (0)
|
|
|
|
#endif
|
2012-05-14 05:56:25 +08:00
|
|
|
|
2015-10-08 07:20:35 +08:00
|
|
|
#define net_get_random_once(buf, nbytes) \
|
|
|
|
get_random_once((buf), (nbytes))
|
2017-06-08 08:05:02 +08:00
|
|
|
#define net_get_random_once_wait(buf, nbytes) \
|
|
|
|
get_random_once_wait((buf), (nbytes))
|
2013-10-20 03:48:55 +08:00
|
|
|
|
2013-09-27 05:48:15 +08:00
|
|
|
int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
|
|
|
|
size_t num, size_t len);
|
2017-07-29 07:22:41 +08:00
|
|
|
int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
|
|
|
|
struct kvec *vec, size_t num, size_t len);
|
2013-09-27 05:48:15 +08:00
|
|
|
int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
|
|
|
|
size_t num, size_t len, int flags);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2013-09-27 05:48:15 +08:00
|
|
|
int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen);
|
|
|
|
int kernel_listen(struct socket *sock, int backlog);
|
|
|
|
int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
|
|
|
|
int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
|
|
|
|
int flags);
|
2018-02-13 03:00:20 +08:00
|
|
|
int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
|
|
|
|
int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
|
2013-09-27 05:48:15 +08:00
|
|
|
int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
|
|
|
|
int *optlen);
|
|
|
|
int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
|
|
|
|
unsigned int optlen);
|
|
|
|
int kernel_sendpage(struct socket *sock, struct page *page, int offset,
|
|
|
|
size_t size, int flags);
|
2017-07-29 07:22:41 +08:00
|
|
|
int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
|
|
|
|
size_t size, int flags);
|
2013-09-27 05:48:15 +08:00
|
|
|
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
|
2006-08-08 11:57:31 +08:00
|
|
|
|
2017-04-13 09:31:04 +08:00
|
|
|
/* Routine returns the IP overhead imposed by a (caller-protected) socket. */
|
2017-04-06 07:50:35 +08:00
|
|
|
u32 kernel_sock_ip_overhead(struct sock *sk);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#define MODULE_ALIAS_NETPROTO(proto) \
|
|
|
|
MODULE_ALIAS("net-pf-" __stringify(proto))
|
|
|
|
|
2005-08-10 10:40:55 +08:00
|
|
|
#define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \
|
|
|
|
MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto))
|
|
|
|
|
2007-10-22 07:44:04 +08:00
|
|
|
#define MODULE_ALIAS_NET_PF_PROTO_TYPE(pf, proto, type) \
|
|
|
|
MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
|
|
|
|
"-type-" __stringify(type))
|
|
|
|
|
2012-05-29 17:30:40 +08:00
|
|
|
#define MODULE_ALIAS_NET_PF_PROTO_NAME(pf, proto, name) \
|
|
|
|
MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
|
|
|
|
name)
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif /* _LINUX_NET_H */
|