linux-sg2042/arch/s390/kernel/compat_linux.c

521 lines
13 KiB
C
Raw Normal View History

/*
* S390 version
* Copyright IBM Corp. 2000
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Gerhard Tonn (ton@de.ibm.com)
* Thomas Spatzier (tspat@de.ibm.com)
*
* Conversion between 31bit and 64bit native syscalls.
*
* Heavily inspired by the 32-bit Sparc compat code which is
* Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
*
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/file.h>
#include <linux/signal.h>
#include <linux/resource.h>
#include <linux/times.h>
#include <linux/smp.h>
#include <linux/sem.h>
#include <linux/msg.h>
#include <linux/shm.h>
#include <linux/uio.h>
#include <linux/quota.h>
#include <linux/module.h>
#include <linux/poll.h>
#include <linux/personality.h>
#include <linux/stat.h>
#include <linux/filter.h>
#include <linux/highmem.h>
#include <linux/highuid.h>
#include <linux/mman.h>
#include <linux/ipv6.h>
#include <linux/in.h>
#include <linux/icmpv6.h>
#include <linux/syscalls.h>
#include <linux/sysctl.h>
#include <linux/binfmts.h>
#include <linux/capability.h>
#include <linux/compat.h>
#include <linux/vfs.h>
#include <linux/ptrace.h>
#include <linux/fadvise.h>
#include <linux/ipc.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <asm/types.h>
#include <linux/uaccess.h>
#include <net/scm.h>
#include <net/sock.h>
#include "compat_linux.h"
/* For this source file, we want overflow handling. */
#undef high2lowuid
#undef high2lowgid
#undef low2highuid
#undef low2highgid
#undef SET_UID16
#undef SET_GID16
#undef NEW_TO_OLD_UID
#undef NEW_TO_OLD_GID
#undef SET_OLDSTAT_UID
#undef SET_OLDSTAT_GID
#undef SET_STAT_UID
#undef SET_STAT_GID
#define high2lowuid(uid) ((uid) > 65535) ? (u16)overflowuid : (u16)(uid)
#define high2lowgid(gid) ((gid) > 65535) ? (u16)overflowgid : (u16)(gid)
#define low2highuid(uid) ((uid) == (u16)-1) ? (uid_t)-1 : (uid_t)(uid)
#define low2highgid(gid) ((gid) == (u16)-1) ? (gid_t)-1 : (gid_t)(gid)
#define SET_UID16(var, uid) var = high2lowuid(uid)
#define SET_GID16(var, gid) var = high2lowgid(gid)
#define NEW_TO_OLD_UID(uid) high2lowuid(uid)
#define NEW_TO_OLD_GID(gid) high2lowgid(gid)
#define SET_OLDSTAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid)
#define SET_OLDSTAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid)
#define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid)
#define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid)
COMPAT_SYSCALL_DEFINE3(s390_chown16, const char __user *, filename,
u16, user, u16, group)
{
return sys_chown(filename, low2highuid(user), low2highgid(group));
}
COMPAT_SYSCALL_DEFINE3(s390_lchown16, const char __user *,
filename, u16, user, u16, group)
{
return sys_lchown(filename, low2highuid(user), low2highgid(group));
}
COMPAT_SYSCALL_DEFINE3(s390_fchown16, unsigned int, fd, u16, user, u16, group)
{
return sys_fchown(fd, low2highuid(user), low2highgid(group));
}
COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid)
{
return sys_setregid(low2highgid(rgid), low2highgid(egid));
}
COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid)
{
return sys_setgid((gid_t)gid);
}
COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid)
{
return sys_setreuid(low2highuid(ruid), low2highuid(euid));
}
COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid)
{
return sys_setuid((uid_t)uid);
}
COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid)
{
return sys_setresuid(low2highuid(ruid), low2highuid(euid),
low2highuid(suid));
}
COMPAT_SYSCALL_DEFINE3(s390_getresuid16, u16 __user *, ruidp,
u16 __user *, euidp, u16 __user *, suidp)
{
const struct cred *cred = current_cred();
int retval;
u16 ruid, euid, suid;
ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid));
euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid));
suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid));
if (!(retval = put_user(ruid, ruidp)) &&
!(retval = put_user(euid, euidp)))
retval = put_user(suid, suidp);
return retval;
}
COMPAT_SYSCALL_DEFINE3(s390_setresgid16, u16, rgid, u16, egid, u16, sgid)
{
return sys_setresgid(low2highgid(rgid), low2highgid(egid),
low2highgid(sgid));
}
COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp,
u16 __user *, egidp, u16 __user *, sgidp)
{
const struct cred *cred = current_cred();
int retval;
u16 rgid, egid, sgid;
rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid));
egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid));
sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid));
if (!(retval = put_user(rgid, rgidp)) &&
!(retval = put_user(egid, egidp)))
retval = put_user(sgid, sgidp);
return retval;
}
COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid)
{
return sys_setfsuid((uid_t)uid);
}
COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid)
{
return sys_setfsgid((gid_t)gid);
}
static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info)
{
struct user_namespace *user_ns = current_user_ns();
int i;
u16 group;
kgid_t kgid;
for (i = 0; i < group_info->ngroups; i++) {
cred: simpler, 1D supplementary groups Current supplementary groups code can massively overallocate memory and is implemented in a way so that access to individual gid is done via 2D array. If number of gids is <= 32, memory allocation is more or less tolerable (140/148 bytes). But if it is not, code allocates full page (!) regardless and, what's even more fun, doesn't reuse small 32-entry array. 2D array means dependent shifts, loads and LEAs without possibility to optimize them (gid is never known at compile time). All of the above is unnecessary. Switch to the usual trailing-zero-len-array scheme. Memory is allocated with kmalloc/vmalloc() and only as much as needed. Accesses become simpler (LEA 8(gi,idx,4) or even without displacement). Maximum number of gids is 65536 which translates to 256KB+8 bytes. I think kernel can handle such allocation. On my usual desktop system with whole 9 (nine) aux groups, struct group_info shrinks from 148 bytes to 44 bytes, yay! Nice side effects: - "gi->gid[i]" is shorter than "GROUP_AT(gi, i)", less typing, - fix little mess in net/ipv4/ping.c should have been using GROUP_AT macro but this point becomes moot, - aux group allocation is persistent and should be accounted as such. Link: http://lkml.kernel.org/r/20160817201927.GA2096@p183.telecom.by Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Cc: Vasily Kulikov <segoon@openwall.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-08 08:03:12 +08:00
kgid = group_info->gid[i];
group = (u16)from_kgid_munged(user_ns, kgid);
if (put_user(group, grouplist+i))
return -EFAULT;
}
return 0;
}
static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist)
{
struct user_namespace *user_ns = current_user_ns();
int i;
u16 group;
kgid_t kgid;
for (i = 0; i < group_info->ngroups; i++) {
if (get_user(group, grouplist+i))
return -EFAULT;
kgid = make_kgid(user_ns, (gid_t)group);
if (!gid_valid(kgid))
return -EINVAL;
cred: simpler, 1D supplementary groups Current supplementary groups code can massively overallocate memory and is implemented in a way so that access to individual gid is done via 2D array. If number of gids is <= 32, memory allocation is more or less tolerable (140/148 bytes). But if it is not, code allocates full page (!) regardless and, what's even more fun, doesn't reuse small 32-entry array. 2D array means dependent shifts, loads and LEAs without possibility to optimize them (gid is never known at compile time). All of the above is unnecessary. Switch to the usual trailing-zero-len-array scheme. Memory is allocated with kmalloc/vmalloc() and only as much as needed. Accesses become simpler (LEA 8(gi,idx,4) or even without displacement). Maximum number of gids is 65536 which translates to 256KB+8 bytes. I think kernel can handle such allocation. On my usual desktop system with whole 9 (nine) aux groups, struct group_info shrinks from 148 bytes to 44 bytes, yay! Nice side effects: - "gi->gid[i]" is shorter than "GROUP_AT(gi, i)", less typing, - fix little mess in net/ipv4/ping.c should have been using GROUP_AT macro but this point becomes moot, - aux group allocation is persistent and should be accounted as such. Link: http://lkml.kernel.org/r/20160817201927.GA2096@p183.telecom.by Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Cc: Vasily Kulikov <segoon@openwall.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-08 08:03:12 +08:00
group_info->gid[i] = kgid;
}
return 0;
}
COMPAT_SYSCALL_DEFINE2(s390_getgroups16, int, gidsetsize, u16 __user *, grouplist)
{
const struct cred *cred = current_cred();
int i;
if (gidsetsize < 0)
return -EINVAL;
get_group_info(cred->group_info);
i = cred->group_info->ngroups;
if (gidsetsize) {
if (i > gidsetsize) {
i = -EINVAL;
goto out;
}
if (groups16_to_user(grouplist, cred->group_info)) {
i = -EFAULT;
goto out;
}
}
out:
put_group_info(cred->group_info);
return i;
}
COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplist)
{
struct group_info *group_info;
int retval;
if (!may_setgroups())
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
group_info = groups_alloc(gidsetsize);
if (!group_info)
return -ENOMEM;
retval = groups16_from_user(group_info, grouplist);
if (retval) {
put_group_info(group_info);
return retval;
}
retval = set_current_groups(group_info);
put_group_info(group_info);
return retval;
}
COMPAT_SYSCALL_DEFINE0(s390_getuid16)
{
return high2lowuid(from_kuid_munged(current_user_ns(), current_uid()));
}
COMPAT_SYSCALL_DEFINE0(s390_geteuid16)
{
return high2lowuid(from_kuid_munged(current_user_ns(), current_euid()));
}
COMPAT_SYSCALL_DEFINE0(s390_getgid16)
{
return high2lowgid(from_kgid_munged(current_user_ns(), current_gid()));
}
COMPAT_SYSCALL_DEFINE0(s390_getegid16)
{
return high2lowgid(from_kgid_munged(current_user_ns(), current_egid()));
}
#ifdef CONFIG_SYSVIPC
COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second,
compat_ulong_t, third, compat_uptr_t, ptr)
{
if (call >> 16) /* hack for backward compatibility */
return -EINVAL;
return compat_sys_ipc(call, first, second, third, ptr, third);
}
#endif
COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low)
{
return sys_truncate(path, (unsigned long)high << 32 | low);
}
COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low)
{
return sys_ftruncate(fd, (unsigned long)high << 32 | low);
}
COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf,
compat_size_t, count, u32, high, u32, low)
{
if ((compat_ssize_t) count < 0)
return -EINVAL;
return sys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low);
}
COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf,
compat_size_t, count, u32, high, u32, low)
{
if ((compat_ssize_t) count < 0)
return -EINVAL;
return sys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low);
}
COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count)
{
return sys_readahead(fd, (unsigned long)high << 32 | low, count);
}
struct stat64_emu31 {
unsigned long long st_dev;
unsigned int __pad1;
#define STAT64_HAS_BROKEN_ST_INO 1
u32 __st_ino;
unsigned int st_mode;
unsigned int st_nlink;
u32 st_uid;
u32 st_gid;
unsigned long long st_rdev;
unsigned int __pad3;
long st_size;
u32 st_blksize;
unsigned char __pad4[4];
u32 __pad5; /* future possible st_blocks high bits */
u32 st_blocks; /* Number 512-byte blocks allocated. */
u32 st_atime;
u32 __pad6;
u32 st_mtime;
u32 __pad7;
u32 st_ctime;
u32 __pad8; /* will be high 32 bits of ctime someday */
unsigned long st_ino;
};
static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat)
{
struct stat64_emu31 tmp;
memset(&tmp, 0, sizeof(tmp));
tmp.st_dev = huge_encode_dev(stat->dev);
tmp.st_ino = stat->ino;
tmp.__st_ino = (u32)stat->ino;
tmp.st_mode = stat->mode;
tmp.st_nlink = (unsigned int)stat->nlink;
tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
tmp.st_rdev = huge_encode_dev(stat->rdev);
tmp.st_size = stat->size;
tmp.st_blksize = (u32)stat->blksize;
tmp.st_blocks = (u32)stat->blocks;
tmp.st_atime = (u32)stat->atime.tv_sec;
tmp.st_mtime = (u32)stat->mtime.tv_sec;
tmp.st_ctime = (u32)stat->ctime.tv_sec;
return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
}
COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_stat(filename, &stat);
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_lstat(filename, &stat);
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_fstat(fd, &stat);
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename,
struct stat64_emu31 __user *, statbuf, int, flag)
{
struct kstat stat;
int error;
error = vfs_fstatat(dfd, filename, &stat, flag);
if (error)
return error;
return cp_stat64(statbuf, &stat);
}
/*
* Linux/i386 didn't use to be able to handle more than
* 4 system call parameters, so these system calls used a memory
* block for parameter passing..
*/
struct mmap_arg_struct_emu31 {
compat_ulong_t addr;
compat_ulong_t len;
compat_ulong_t prot;
compat_ulong_t flags;
compat_ulong_t fd;
compat_ulong_t offset;
};
COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg)
{
struct mmap_arg_struct_emu31 a;
if (copy_from_user(&a, arg, sizeof(a)))
return -EFAULT;
if (a.offset & ~PAGE_MASK)
return -EINVAL;
return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
a.offset >> PAGE_SHIFT);
}
COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg)
{
struct mmap_arg_struct_emu31 a;
if (copy_from_user(&a, arg, sizeof(a)))
return -EFAULT;
return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
}
COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count)
{
if ((compat_ssize_t) count < 0)
return -EINVAL;
return sys_read(fd, buf, count);
}
COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count)
{
if ((compat_ssize_t) count < 0)
return -EINVAL;
return sys_write(fd, buf, count);
}
/*
* 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64.
* These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE}
* because the 31 bit values differ from the 64 bit values.
*/
COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise)
{
if (advise == 4)
advise = POSIX_FADV_DONTNEED;
else if (advise == 5)
advise = POSIX_FADV_NOREUSE;
return sys_fadvise64(fd, (unsigned long)high << 32 | low, len, advise);
}
struct fadvise64_64_args {
int fd;
long long offset;
long long len;
int advice;
};
COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
{
struct fadvise64_64_args a;
if ( copy_from_user(&a, args, sizeof(a)) )
return -EFAULT;
if (a.advice == 4)
a.advice = POSIX_FADV_DONTNEED;
else if (a.advice == 5)
a.advice = POSIX_FADV_NOREUSE;
return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
}
COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow,
u32, nhigh, u32, nlow, unsigned int, flags)
{
return sys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow,
((u64)nhigh << 32) + nlow, flags);
}
COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow,
u32, lenhigh, u32, lenlow)
{
return sys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow,
((u64)lenhigh << 32) + lenlow);
}