2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Wrapper functions for accessing the file_struct fd array.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __LINUX_FILE_H
|
|
|
|
#define __LINUX_FILE_H
|
|
|
|
|
|
|
|
#include <asm/atomic.h>
|
|
|
|
#include <linux/posix_types.h>
|
|
|
|
#include <linux/compiler.h>
|
|
|
|
#include <linux/spinlock.h>
|
2005-09-10 04:04:13 +08:00
|
|
|
#include <linux/rcupdate.h>
|
[PATCH] Shrinks sizeof(files_struct) and better layout
1) Reduce the size of (struct fdtable) to exactly 64 bytes on 32bits
platforms, lowering kmalloc() allocated space by 50%.
2) Reduce the size of (files_struct), using a special 32 bits (or
64bits) embedded_fd_set, instead of a 1024 bits fd_set for the
close_on_exec_init and open_fds_init fields. This save some ram (248
bytes per task) as most tasks dont open more than 32 files. D-Cache
footprint for such tasks is also reduced to the minimum.
3) Reduce size of allocated fdset. Currently two full pages are
allocated, that is 32768 bits on x86 for example, and way too much. The
minimum is now L1_CACHE_BYTES.
UP and SMP should benefit from this patch, because most tasks will touch
only one cache line when open()/close() stdin/stdout/stderr (0/1/2),
(next_fd, close_on_exec_init, open_fds_init, fd_array[0 .. 2] being in the
same cache line)
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-03-23 19:00:12 +08:00
|
|
|
#include <linux/types.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The default fd array needs to be at least BITS_PER_LONG,
|
|
|
|
* as this is the granularity returned by copy_fdset().
|
|
|
|
*/
|
|
|
|
#define NR_OPEN_DEFAULT BITS_PER_LONG
|
|
|
|
|
[PATCH] Shrinks sizeof(files_struct) and better layout
1) Reduce the size of (struct fdtable) to exactly 64 bytes on 32bits
platforms, lowering kmalloc() allocated space by 50%.
2) Reduce the size of (files_struct), using a special 32 bits (or
64bits) embedded_fd_set, instead of a 1024 bits fd_set for the
close_on_exec_init and open_fds_init fields. This save some ram (248
bytes per task) as most tasks dont open more than 32 files. D-Cache
footprint for such tasks is also reduced to the minimum.
3) Reduce size of allocated fdset. Currently two full pages are
allocated, that is 32768 bits on x86 for example, and way too much. The
minimum is now L1_CACHE_BYTES.
UP and SMP should benefit from this patch, because most tasks will touch
only one cache line when open()/close() stdin/stdout/stderr (0/1/2),
(next_fd, close_on_exec_init, open_fds_init, fd_array[0 .. 2] being in the
same cache line)
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-03-23 19:00:12 +08:00
|
|
|
/*
|
|
|
|
* The embedded_fd_set is a small fd_set,
|
|
|
|
* suitable for most tasks (which open <= BITS_PER_LONG files)
|
|
|
|
*/
|
|
|
|
struct embedded_fd_set {
|
|
|
|
unsigned long fds_bits[1];
|
|
|
|
};
|
|
|
|
|
2005-09-10 04:04:10 +08:00
|
|
|
struct fdtable {
|
|
|
|
unsigned int max_fds;
|
|
|
|
struct file ** fd; /* current fd array */
|
|
|
|
fd_set *close_on_exec;
|
|
|
|
fd_set *open_fds;
|
2005-09-10 04:04:13 +08:00
|
|
|
struct rcu_head rcu;
|
|
|
|
struct fdtable *next;
|
2005-09-10 04:04:10 +08:00
|
|
|
};
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Open file table structure
|
|
|
|
*/
|
|
|
|
struct files_struct {
|
[PATCH] Shrinks sizeof(files_struct) and better layout
1) Reduce the size of (struct fdtable) to exactly 64 bytes on 32bits
platforms, lowering kmalloc() allocated space by 50%.
2) Reduce the size of (files_struct), using a special 32 bits (or
64bits) embedded_fd_set, instead of a 1024 bits fd_set for the
close_on_exec_init and open_fds_init fields. This save some ram (248
bytes per task) as most tasks dont open more than 32 files. D-Cache
footprint for such tasks is also reduced to the minimum.
3) Reduce size of allocated fdset. Currently two full pages are
allocated, that is 32768 bits on x86 for example, and way too much. The
minimum is now L1_CACHE_BYTES.
UP and SMP should benefit from this patch, because most tasks will touch
only one cache line when open()/close() stdin/stdout/stderr (0/1/2),
(next_fd, close_on_exec_init, open_fds_init, fd_array[0 .. 2] being in the
same cache line)
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-03-23 19:00:12 +08:00
|
|
|
/*
|
|
|
|
* read mostly part
|
|
|
|
*/
|
2005-11-14 08:06:24 +08:00
|
|
|
atomic_t count;
|
2005-09-10 04:04:13 +08:00
|
|
|
struct fdtable *fdt;
|
2005-09-10 04:04:10 +08:00
|
|
|
struct fdtable fdtab;
|
[PATCH] Shrinks sizeof(files_struct) and better layout
1) Reduce the size of (struct fdtable) to exactly 64 bytes on 32bits
platforms, lowering kmalloc() allocated space by 50%.
2) Reduce the size of (files_struct), using a special 32 bits (or
64bits) embedded_fd_set, instead of a 1024 bits fd_set for the
close_on_exec_init and open_fds_init fields. This save some ram (248
bytes per task) as most tasks dont open more than 32 files. D-Cache
footprint for such tasks is also reduced to the minimum.
3) Reduce size of allocated fdset. Currently two full pages are
allocated, that is 32768 bits on x86 for example, and way too much. The
minimum is now L1_CACHE_BYTES.
UP and SMP should benefit from this patch, because most tasks will touch
only one cache line when open()/close() stdin/stdout/stderr (0/1/2),
(next_fd, close_on_exec_init, open_fds_init, fd_array[0 .. 2] being in the
same cache line)
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-03-23 19:00:12 +08:00
|
|
|
/*
|
|
|
|
* written part on a separate cache line in SMP
|
|
|
|
*/
|
|
|
|
spinlock_t file_lock ____cacheline_aligned_in_smp;
|
|
|
|
int next_fd;
|
|
|
|
struct embedded_fd_set close_on_exec_init;
|
|
|
|
struct embedded_fd_set open_fds_init;
|
2005-11-14 08:06:24 +08:00
|
|
|
struct file * fd_array[NR_OPEN_DEFAULT];
|
2005-04-17 06:20:36 +08:00
|
|
|
};
|
|
|
|
|
2005-09-10 04:04:13 +08:00
|
|
|
#define files_fdtable(files) (rcu_dereference((files)->fdt))
|
2005-09-10 04:04:10 +08:00
|
|
|
|
2006-12-07 12:32:52 +08:00
|
|
|
extern struct kmem_cache *filp_cachep;
|
|
|
|
|
2008-02-14 07:03:15 +08:00
|
|
|
extern void __fput(struct file *);
|
|
|
|
extern void fput(struct file *);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-10-17 14:31:13 +08:00
|
|
|
struct file_operations;
|
|
|
|
struct vfsmount;
|
|
|
|
struct dentry;
|
|
|
|
extern int init_file(struct file *, struct vfsmount *mnt,
|
|
|
|
struct dentry *dentry, mode_t mode,
|
|
|
|
const struct file_operations *fop);
|
|
|
|
extern struct file *alloc_file(struct vfsmount *, struct dentry *dentry,
|
|
|
|
mode_t mode, const struct file_operations *fop);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static inline void fput_light(struct file *file, int fput_needed)
|
|
|
|
{
|
|
|
|
if (unlikely(fput_needed))
|
|
|
|
fput(file);
|
|
|
|
}
|
|
|
|
|
2008-02-14 07:03:15 +08:00
|
|
|
extern struct file *fget(unsigned int fd);
|
|
|
|
extern struct file *fget_light(unsigned int fd, int *fput_needed);
|
|
|
|
extern void set_close_on_exec(unsigned int fd, int flag);
|
2005-04-17 06:20:36 +08:00
|
|
|
extern void put_filp(struct file *);
|
|
|
|
extern int get_unused_fd(void);
|
O_CLOEXEC for SCM_RIGHTS
Part two in the O_CLOEXEC saga: adding support for file descriptors received
through Unix domain sockets.
The patch is once again pretty minimal, it introduces a new flag for recvmsg
and passes it just like the existing MSG_CMSG_COMPAT flag. I think this bit
is not used otherwise but the networking people will know better.
This new flag is not recognized by recvfrom and recv. These functions cannot
be used for that purpose and the asymmetry this introduces is not worse than
the already existing MSG_CMSG_COMPAT situations.
The patch must be applied on the patch which introduced O_CLOEXEC. It has to
remove static from the new get_unused_fd_flags function but since scm.c cannot
live in a module the function still hasn't to be exported.
Here's a test program to make sure the code works. It's so much longer than
the actual patch...
#include <errno.h>
#include <error.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/un.h>
#ifndef O_CLOEXEC
# define O_CLOEXEC 02000000
#endif
#ifndef MSG_CMSG_CLOEXEC
# define MSG_CMSG_CLOEXEC 0x40000000
#endif
int
main (int argc, char *argv[])
{
if (argc > 1)
{
int fd = atol (argv[1]);
printf ("child: fd = %d\n", fd);
if (fcntl (fd, F_GETFD) == 0 || errno != EBADF)
{
puts ("file descriptor valid in child");
return 1;
}
return 0;
}
struct sockaddr_un sun;
strcpy (sun.sun_path, "./testsocket");
sun.sun_family = AF_UNIX;
char databuf[] = "hello";
struct iovec iov[1];
iov[0].iov_base = databuf;
iov[0].iov_len = sizeof (databuf);
union
{
struct cmsghdr hdr;
char bytes[CMSG_SPACE (sizeof (int))];
} buf;
struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
.msg_control = buf.bytes,
.msg_controllen = sizeof (buf) };
struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN (sizeof (int));
msg.msg_controllen = cmsg->cmsg_len;
pid_t child = fork ();
if (child == -1)
error (1, errno, "fork");
if (child == 0)
{
int sock = socket (PF_UNIX, SOCK_STREAM, 0);
if (sock < 0)
error (1, errno, "socket");
if (bind (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0)
error (1, errno, "bind");
if (listen (sock, SOMAXCONN) < 0)
error (1, errno, "listen");
int conn = accept (sock, NULL, NULL);
if (conn == -1)
error (1, errno, "accept");
*(int *) CMSG_DATA (cmsg) = sock;
if (sendmsg (conn, &msg, MSG_NOSIGNAL) < 0)
error (1, errno, "sendmsg");
return 0;
}
/* For a test suite this should be more robust like a
barrier in shared memory. */
sleep (1);
int sock = socket (PF_UNIX, SOCK_STREAM, 0);
if (sock < 0)
error (1, errno, "socket");
if (connect (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0)
error (1, errno, "connect");
unlink (sun.sun_path);
*(int *) CMSG_DATA (cmsg) = -1;
if (recvmsg (sock, &msg, MSG_CMSG_CLOEXEC) < 0)
error (1, errno, "recvmsg");
int fd = *(int *) CMSG_DATA (cmsg);
if (fd == -1)
error (1, 0, "no descriptor received");
char fdname[20];
snprintf (fdname, sizeof (fdname), "%d", fd);
execl ("/proc/self/exe", argv[0], fdname, NULL);
puts ("execl failed");
return 1;
}
[akpm@linux-foundation.org: Fix fastcall inconsistency noted by Michael Buesch]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Michael Buesch <mb@bu3sch.de>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 14:40:34 +08:00
|
|
|
extern int get_unused_fd_flags(int flags);
|
2008-02-14 07:03:15 +08:00
|
|
|
extern void put_unused_fd(unsigned int fd);
|
2005-11-07 16:58:01 +08:00
|
|
|
struct kmem_cache;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
extern int expand_files(struct files_struct *, int nr);
|
2006-12-10 18:21:17 +08:00
|
|
|
extern void free_fdtable_rcu(struct rcu_head *rcu);
|
2005-09-10 04:04:13 +08:00
|
|
|
extern void __init files_defer_init(void);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-12-22 17:10:43 +08:00
|
|
|
static inline void free_fdtable(struct fdtable *fdt)
|
|
|
|
{
|
|
|
|
call_rcu(&fdt->rcu, free_fdtable_rcu);
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
|
|
|
|
{
|
|
|
|
struct file * file = NULL;
|
2005-09-10 04:04:10 +08:00
|
|
|
struct fdtable *fdt = files_fdtable(files);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2005-09-10 04:04:10 +08:00
|
|
|
if (fd < fdt->max_fds)
|
2005-09-10 04:04:13 +08:00
|
|
|
file = rcu_dereference(fdt->fd[fd]);
|
2005-04-17 06:20:36 +08:00
|
|
|
return file;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check whether the specified fd has an open file.
|
|
|
|
*/
|
|
|
|
#define fcheck(fd) fcheck_files(current->files, fd)
|
|
|
|
|
2008-02-14 07:03:15 +08:00
|
|
|
extern void fd_install(unsigned int fd, struct file *file);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
struct task_struct;
|
|
|
|
|
|
|
|
struct files_struct *get_files_struct(struct task_struct *);
|
2008-02-14 07:03:15 +08:00
|
|
|
void put_files_struct(struct files_struct *fs);
|
2006-09-29 17:00:05 +08:00
|
|
|
void reset_files_struct(struct task_struct *, struct files_struct *);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-12-07 12:32:50 +08:00
|
|
|
extern struct kmem_cache *files_cachep;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif /* __LINUX_FILE_H */
|