From 1ecff5ef0a70e7fd32a0b9cf6ad7ac8285462697 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Jan 2019 14:15:19 +0100 Subject: [PATCH 01/54] s390: open-code s390_personality syscall Patch series "s390: rework compat wrapper generation". As promised, I gave this a go and changed the SYSCALL_DEFINEx() infrastructure to always include the wrappers for doing the 31-bit argument conversion on s390 compat mode. This does three main things: - The UID16 rework saved a lot of duplicated code, and would probably make sense by itself, but is also required as we can no longer call sys_*() functions directly after the last step. - Removing the compat_wrapper.c file is of course the main goal here, in order to remove the need to maintain the compat_wrapper.c file when new system calls get added. Unfortunately, this requires adding some complexity in syscall_wrapper.h, and trades a small reduction in source code lines for a small increase in binary size for unused wrappers. - As an added benefit, the use of syscall_wrapper.h now makes it easy to change the syscall wrappers so they no longer see all user space register contents, similar to changes done in commits fa697140f9a2 ("syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls") and 4378a7d4be30 ("arm64: implement syscall wrappers"). I leave the actual implementation of this for you, if you want to do it later. I did not test the changes at runtime, but I looked at the generated object code, which seems fine here and includes the same conversions as before. This patch(of 5): The sys_personality function is not meant to be called from other system calls. We could introduce an intermediate ksys_personality function, but it does almost nothing, so this just moves the implementation into the caller. Link: https://lore.kernel.org/lkml/20190116131527.2071570-1-arnd@arndb.de Link: https://lore.kernel.org/lkml/20190116131527.2071570-2-arnd@arndb.de Signed-off-by: Arnd Bergmann Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sys_s390.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 31cefe0c28c0..560bdaf8a74f 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -79,12 +79,15 @@ SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, SYSCALL_DEFINE1(s390_personality, unsigned int, personality) { - unsigned int ret; + unsigned int ret = current->personality; if (personality(current->personality) == PER_LINUX32 && personality(personality) == PER_LINUX) personality |= PER_LINUX32; - ret = sys_personality(personality); + + if (personality != 0xffffffff) + set_personality(personality); + if (personality(ret) == PER_LINUX32) ret &= ~PER_LINUX32; From 58fa4a410fc31afe08d0d0c6b6d8860c22ec17c2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Jan 2019 14:15:20 +0100 Subject: [PATCH 02/54] ipc: introduce ksys_ipc()/compat_ksys_ipc() for s390 The sys_ipc() and compat_ksys_ipc() functions are meant to only be used from the system call table, not called by another function. Introduce ksys_*() interfaces for this purpose, as we have done for many other system calls. Link: https://lore.kernel.org/lkml/20190116131527.2071570-3-arnd@arndb.de Signed-off-by: Arnd Bergmann Reviewed-by: Heiko Carstens [heiko.carstens@de.ibm.com: compile fix for !CONFIG_COMPAT] Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_linux.c | 2 +- arch/s390/kernel/sys_s390.c | 4 +++- include/linux/syscalls.h | 4 ++++ ipc/syscall.c | 20 ++++++++++++++++---- kernel/sys_ni.c | 1 + 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 8ac38d51ed7d..a47f6d3c6d5b 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -296,7 +296,7 @@ COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second, { if (call >> 16) /* hack for backward compatibility */ return -EINVAL; - return compat_sys_ipc(call, first, second, third, ptr, third); + return compat_ksys_ipc(call, first, second, third, ptr, third); } #endif diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index 560bdaf8a74f..fd0cbbed4d9f 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -58,6 +58,7 @@ out: return error; } +#ifdef CONFIG_SYSVIPC /* * sys_ipc() is the de-multiplexer for the SysV IPC calls. */ @@ -74,8 +75,9 @@ SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second, * Therefore we can call the generic variant by simply passing the * third parameter also as fifth parameter. */ - return sys_ipc(call, first, second, third, ptr, third); + return ksys_ipc(call, first, second, third, ptr, third); } +#endif /* CONFIG_SYSVIPC */ SYSCALL_DEFINE1(s390_personality, unsigned int, personality) { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 257cccba3062..fb63045a0fb6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1185,6 +1185,10 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff); ssize_t ksys_readahead(int fd, loff_t offset, size_t count); +int ksys_ipc(unsigned int call, int first, unsigned long second, + unsigned long third, void __user * ptr, long fifth); +int compat_ksys_ipc(u32 call, int first, int second, + u32 third, u32 ptr, u32 fifth); /* * The following kernel syscall equivalents are just wrappers to fs-internal diff --git a/ipc/syscall.c b/ipc/syscall.c index 1ac06e3983c0..3cf8ad703a4d 100644 --- a/ipc/syscall.c +++ b/ipc/syscall.c @@ -17,8 +17,8 @@ #include #include -SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, - unsigned long, third, void __user *, ptr, long, fifth) +int ksys_ipc(unsigned int call, int first, unsigned long second, + unsigned long third, void __user * ptr, long fifth) { int version, ret; @@ -106,6 +106,12 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, return -ENOSYS; } } + +SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second, + unsigned long, third, void __user *, ptr, long, fifth) +{ + return ksys_ipc(call, first, second, third, ptr, fifth); +} #endif #ifdef CONFIG_COMPAT @@ -121,8 +127,8 @@ struct compat_ipc_kludge { }; #ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC -COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, - u32, third, compat_uptr_t, ptr, u32, fifth) +int compat_ksys_ipc(u32 call, int first, int second, + u32 third, compat_uptr_t ptr, u32 fifth) { int version; u32 pad; @@ -195,5 +201,11 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, return -ENOSYS; } + +COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second, + u32, third, compat_uptr_t, ptr, u32, fifth) +{ + return compat_ksys_ipc(call, first, second, third, ptr, fifth); +} #endif #endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index ab9d0e3c6d50..bc934f31ab10 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -366,6 +366,7 @@ COND_SYSCALL(kexec_file_load); /* s390 */ COND_SYSCALL(s390_pci_mmio_read); COND_SYSCALL(s390_pci_mmio_write); +COND_SYSCALL(s390_ipc); COND_SYSCALL_COMPAT(s390_ipc); /* powerpc */ From fef747bab3c09b30b82f76a391ee0ed83d2d6965 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Jan 2019 14:15:21 +0100 Subject: [PATCH 03/54] s390: use generic UID16 implementation s390 has an almost identical copy of the code in kernel/uid16.c. The problem here is that it requires calling the regular system calls, which the generic implementation handles correctly, but the internal interfaces are not declared in a global header for this. The best way forward here seems to be to just use the generic code and delete the s390 specific implementation. I keep the changes to uapi/asm/posix_types.h inside of an #ifdef check so user space does not observe any changes. As some of the system calls pass pointers, we also need wrappers in compat_wrapper.c, which I add for all calls with at least one argument. All those wrappers can be removed in a later step. Link: https://lore.kernel.org/lkml/20190116131527.2071570-4-arnd@arndb.de Signed-off-by: Arnd Bergmann Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/include/uapi/asm/posix_types.h | 6 + arch/s390/kernel/compat_linux.c | 233 ----------------------- arch/s390/kernel/compat_wrapper.c | 15 ++ arch/s390/kernel/syscalls/syscall.tbl | 38 ++-- 5 files changed, 41 insertions(+), 252 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ed554b09eb3f..41cafd245bbc 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -379,6 +379,7 @@ config COMPAT select COMPAT_BINFMT_ELF if BINFMT_ELF select ARCH_WANT_OLD_COMPAT_IPC select COMPAT_OLD_SIGACTION + select HAVE_UID16 depends on MULTIUSER help Select this option if you want to enable your system kernel to diff --git a/arch/s390/include/uapi/asm/posix_types.h b/arch/s390/include/uapi/asm/posix_types.h index 2a3fc638414b..1913613e71b6 100644 --- a/arch/s390/include/uapi/asm/posix_types.h +++ b/arch/s390/include/uapi/asm/posix_types.h @@ -20,6 +20,12 @@ typedef long __kernel_ssize_t; typedef unsigned short __kernel_old_dev_t; #define __kernel_old_dev_t __kernel_old_dev_t +#ifdef __KERNEL__ +typedef unsigned short __kernel_old_uid_t; +typedef unsigned short __kernel_old_gid_t; +#define __kernel_old_uid_t __kernel_old_uid_t +#endif + #ifndef __s390x__ typedef unsigned long __kernel_ino_t; diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index a47f6d3c6d5b..f9d418d1b619 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -58,238 +57,6 @@ #include "compat_linux.h" -/* For this source file, we want overflow handling. */ - -#undef high2lowuid -#undef high2lowgid -#undef low2highuid -#undef low2highgid -#undef SET_UID16 -#undef SET_GID16 -#undef NEW_TO_OLD_UID -#undef NEW_TO_OLD_GID -#undef SET_OLDSTAT_UID -#undef SET_OLDSTAT_GID -#undef SET_STAT_UID -#undef SET_STAT_GID - -#define high2lowuid(uid) ((uid) > 65535) ? (u16)overflowuid : (u16)(uid) -#define high2lowgid(gid) ((gid) > 65535) ? (u16)overflowgid : (u16)(gid) -#define low2highuid(uid) ((uid) == (u16)-1) ? (uid_t)-1 : (uid_t)(uid) -#define low2highgid(gid) ((gid) == (u16)-1) ? (gid_t)-1 : (gid_t)(gid) -#define SET_UID16(var, uid) var = high2lowuid(uid) -#define SET_GID16(var, gid) var = high2lowgid(gid) -#define NEW_TO_OLD_UID(uid) high2lowuid(uid) -#define NEW_TO_OLD_GID(gid) high2lowgid(gid) -#define SET_OLDSTAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid) -#define SET_OLDSTAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid) -#define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid) -#define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid) - -COMPAT_SYSCALL_DEFINE3(s390_chown16, const char __user *, filename, - u16, user, u16, group) -{ - return ksys_chown(filename, low2highuid(user), low2highgid(group)); -} - -COMPAT_SYSCALL_DEFINE3(s390_lchown16, const char __user *, - filename, u16, user, u16, group) -{ - return ksys_lchown(filename, low2highuid(user), low2highgid(group)); -} - -COMPAT_SYSCALL_DEFINE3(s390_fchown16, unsigned int, fd, u16, user, u16, group) -{ - return ksys_fchown(fd, low2highuid(user), low2highgid(group)); -} - -COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid) -{ - return sys_setregid(low2highgid(rgid), low2highgid(egid)); -} - -COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid) -{ - return sys_setgid(low2highgid(gid)); -} - -COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid) -{ - return sys_setreuid(low2highuid(ruid), low2highuid(euid)); -} - -COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid) -{ - return sys_setuid(low2highuid(uid)); -} - -COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid) -{ - return sys_setresuid(low2highuid(ruid), low2highuid(euid), - low2highuid(suid)); -} - -COMPAT_SYSCALL_DEFINE3(s390_getresuid16, u16 __user *, ruidp, - u16 __user *, euidp, u16 __user *, suidp) -{ - const struct cred *cred = current_cred(); - int retval; - u16 ruid, euid, suid; - - ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid)); - euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid)); - suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid)); - - if (!(retval = put_user(ruid, ruidp)) && - !(retval = put_user(euid, euidp))) - retval = put_user(suid, suidp); - - return retval; -} - -COMPAT_SYSCALL_DEFINE3(s390_setresgid16, u16, rgid, u16, egid, u16, sgid) -{ - return sys_setresgid(low2highgid(rgid), low2highgid(egid), - low2highgid(sgid)); -} - -COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp, - u16 __user *, egidp, u16 __user *, sgidp) -{ - const struct cred *cred = current_cred(); - int retval; - u16 rgid, egid, sgid; - - rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid)); - egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid)); - sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid)); - - if (!(retval = put_user(rgid, rgidp)) && - !(retval = put_user(egid, egidp))) - retval = put_user(sgid, sgidp); - - return retval; -} - -COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid) -{ - return sys_setfsuid(low2highuid(uid)); -} - -COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid) -{ - return sys_setfsgid(low2highgid(gid)); -} - -static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info) -{ - struct user_namespace *user_ns = current_user_ns(); - int i; - u16 group; - kgid_t kgid; - - for (i = 0; i < group_info->ngroups; i++) { - kgid = group_info->gid[i]; - group = (u16)from_kgid_munged(user_ns, kgid); - if (put_user(group, grouplist+i)) - return -EFAULT; - } - - return 0; -} - -static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist) -{ - struct user_namespace *user_ns = current_user_ns(); - int i; - u16 group; - kgid_t kgid; - - for (i = 0; i < group_info->ngroups; i++) { - if (get_user(group, grouplist+i)) - return -EFAULT; - - kgid = make_kgid(user_ns, (gid_t)group); - if (!gid_valid(kgid)) - return -EINVAL; - - group_info->gid[i] = kgid; - } - - return 0; -} - -COMPAT_SYSCALL_DEFINE2(s390_getgroups16, int, gidsetsize, u16 __user *, grouplist) -{ - const struct cred *cred = current_cred(); - int i; - - if (gidsetsize < 0) - return -EINVAL; - - get_group_info(cred->group_info); - i = cred->group_info->ngroups; - if (gidsetsize) { - if (i > gidsetsize) { - i = -EINVAL; - goto out; - } - if (groups16_to_user(grouplist, cred->group_info)) { - i = -EFAULT; - goto out; - } - } -out: - put_group_info(cred->group_info); - return i; -} - -COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplist) -{ - struct group_info *group_info; - int retval; - - if (!may_setgroups()) - return -EPERM; - if ((unsigned)gidsetsize > NGROUPS_MAX) - return -EINVAL; - - group_info = groups_alloc(gidsetsize); - if (!group_info) - return -ENOMEM; - retval = groups16_from_user(group_info, grouplist); - if (retval) { - put_group_info(group_info); - return retval; - } - - groups_sort(group_info); - retval = set_current_groups(group_info); - put_group_info(group_info); - - return retval; -} - -COMPAT_SYSCALL_DEFINE0(s390_getuid16) -{ - return high2lowuid(from_kuid_munged(current_user_ns(), current_uid())); -} - -COMPAT_SYSCALL_DEFINE0(s390_geteuid16) -{ - return high2lowuid(from_kuid_munged(current_user_ns(), current_euid())); -} - -COMPAT_SYSCALL_DEFINE0(s390_getgid16) -{ - return high2lowgid(from_kgid_munged(current_user_ns(), current_gid())); -} - -COMPAT_SYSCALL_DEFINE0(s390_getegid16) -{ - return high2lowgid(from_kgid_munged(current_user_ns(), current_egid())); -} - #ifdef CONFIG_SYSVIPC COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second, compat_ulong_t, third, compat_uptr_t, ptr) diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index 48c4ce668244..f54b7b73f316 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -184,3 +184,18 @@ COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags COMPAT_SYSCALL_WRAP4(s390_sthyi, unsigned long, code, void __user *, info, u64 __user *, rc, unsigned long, flags); COMPAT_SYSCALL_WRAP5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags) COMPAT_SYSCALL_WRAP4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) +COMPAT_SYSCALL_WRAP3(chown16, const char __user *, filename, u16, user, u16, group); +COMPAT_SYSCALL_WRAP3(lchown16, const char __user *, filename, u16, user, u16, group); +COMPAT_SYSCALL_WRAP3(fchown16, unsigned int, fd, u16, user, u16, group); +COMPAT_SYSCALL_WRAP2(setregid16, u16, rgid, u16, egid); +COMPAT_SYSCALL_WRAP1(setgid16, u16, gid); +COMPAT_SYSCALL_WRAP2(setreuid16, u16, ruid, u16, euid); +COMPAT_SYSCALL_WRAP1(setuid16, u16, uid); +COMPAT_SYSCALL_WRAP3(setresuid16, u16, ruid, u16, euid, u16, suid); +COMPAT_SYSCALL_WRAP3(getresuid16, u16 __user *, ruidp, u16 __user *, euidp, u16 __user *, suidp); +COMPAT_SYSCALL_WRAP3(setresgid16, u16, rgid, u16, egid, u16, sgid); +COMPAT_SYSCALL_WRAP3(getresgid16, u16 __user *, rgidp, u16 __user *, egidp, u16 __user *, sgidp); +COMPAT_SYSCALL_WRAP1(setfsuid16, u16, uid); +COMPAT_SYSCALL_WRAP1(setfsgid16, u16, gid); +COMPAT_SYSCALL_WRAP2(getgroups16, int, gidsetsize, u16 __user *, grouplist); +COMPAT_SYSCALL_WRAP2(setgroups16, int, gidsetsize, u16 __user *, grouplist); diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 022fc099b628..f878f74c42fd 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -23,13 +23,13 @@ 13 32 time - compat_sys_time 14 common mknod sys_mknod compat_sys_mknod 15 common chmod sys_chmod compat_sys_chmod -16 32 lchown - compat_sys_s390_lchown16 +16 32 lchown - compat_sys_lchown16 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid sys_getpid 21 common mount sys_mount compat_sys_mount 22 common umount sys_oldumount compat_sys_oldumount -23 32 setuid - compat_sys_s390_setuid16 -24 32 getuid - compat_sys_s390_getuid16 +23 32 setuid - compat_sys_setuid16 +24 32 getuid - sys_getuid16 25 32 stime - compat_sys_stime 26 common ptrace sys_ptrace compat_sys_ptrace 27 common alarm sys_alarm sys_alarm @@ -46,11 +46,11 @@ 42 common pipe sys_pipe compat_sys_pipe 43 common times sys_times compat_sys_times 45 common brk sys_brk compat_sys_brk -46 32 setgid - compat_sys_s390_setgid16 -47 32 getgid - compat_sys_s390_getgid16 +46 32 setgid - compat_sys_setgid16 +47 32 getgid - sys_getgid16 48 common signal sys_signal compat_sys_signal -49 32 geteuid - compat_sys_s390_geteuid16 -50 32 getegid - compat_sys_s390_getegid16 +49 32 geteuid - sys_geteuid16 +50 32 getegid - sys_getegid16 51 common acct sys_acct compat_sys_acct 52 common umount2 sys_umount compat_sys_umount 54 common ioctl sys_ioctl compat_sys_ioctl @@ -64,8 +64,8 @@ 65 common getpgrp sys_getpgrp sys_getpgrp 66 common setsid sys_setsid sys_setsid 67 common sigaction sys_sigaction compat_sys_sigaction -70 32 setreuid - compat_sys_s390_setreuid16 -71 32 setregid - compat_sys_s390_setregid16 +70 32 setreuid - compat_sys_setreuid16 +71 32 setregid - compat_sys_setregid16 72 common sigsuspend sys_sigsuspend compat_sys_sigsuspend 73 common sigpending sys_sigpending compat_sys_sigpending 74 common sethostname sys_sethostname compat_sys_sethostname @@ -74,8 +74,8 @@ 77 common getrusage sys_getrusage compat_sys_getrusage 78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday 79 common settimeofday sys_settimeofday compat_sys_settimeofday -80 32 getgroups - compat_sys_s390_getgroups16 -81 32 setgroups - compat_sys_s390_setgroups16 +80 32 getgroups - compat_sys_getgroups16 +81 32 setgroups - compat_sys_setgroups16 83 common symlink sys_symlink compat_sys_symlink 85 common readlink sys_readlink compat_sys_readlink 86 common uselib sys_uselib compat_sys_uselib @@ -87,7 +87,7 @@ 92 common truncate sys_truncate compat_sys_truncate 93 common ftruncate sys_ftruncate compat_sys_ftruncate 94 common fchmod sys_fchmod sys_fchmod -95 32 fchown - compat_sys_s390_fchown16 +95 32 fchown - compat_sys_fchown16 96 common getpriority sys_getpriority sys_getpriority 97 common setpriority sys_setpriority sys_setpriority 99 common statfs sys_statfs compat_sys_statfs @@ -126,8 +126,8 @@ 135 common sysfs sys_sysfs compat_sys_sysfs 136 common personality sys_s390_personality sys_s390_personality 137 common afs_syscall - - -138 32 setfsuid - compat_sys_s390_setfsuid16 -139 32 setfsgid - compat_sys_s390_setfsgid16 +138 32 setfsuid - compat_sys_setfsuid16 +139 32 setfsgid - compat_sys_setfsgid16 140 32 _llseek - compat_sys_llseek 141 common getdents sys_getdents compat_sys_getdents 142 32 _newselect - compat_sys_select @@ -153,13 +153,13 @@ 161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval 162 common nanosleep sys_nanosleep compat_sys_nanosleep 163 common mremap sys_mremap compat_sys_mremap -164 32 setresuid - compat_sys_s390_setresuid16 -165 32 getresuid - compat_sys_s390_getresuid16 +164 32 setresuid - compat_sys_setresuid16 +165 32 getresuid - compat_sys_getresuid16 167 common query_module - - 168 common poll sys_poll compat_sys_poll 169 common nfsservctl - - -170 32 setresgid - compat_sys_s390_setresgid16 -171 32 getresgid - compat_sys_s390_getresgid16 +170 32 setresgid - compat_sys_setresgid16 +171 32 getresgid - compat_sys_getresgid16 172 common prctl sys_prctl compat_sys_prctl 173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn 174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction @@ -170,7 +170,7 @@ 179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 180 common pread64 sys_pread64 compat_sys_s390_pread64 181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64 -182 32 chown - compat_sys_s390_chown16 +182 32 chown - compat_sys_chown16 183 common getcwd sys_getcwd compat_sys_getcwd 184 common capget sys_capget compat_sys_capget 185 common capset sys_capset compat_sys_capset From aa0d6e70d3b34e710a6a57a53a3096cb2e0ea99f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Jan 2019 14:15:22 +0100 Subject: [PATCH 04/54] s390: autogenerate compat syscall wrappers Any system call that takes a pointer argument on s390 requires a wrapper function to do a 31-to-64 zero-extension, these are currently generated in arch/s390/kernel/compat_wrapper.c. On arm64 and x86, we already generate similar wrappers for all system calls in the place of their definition, just for a different purpose (they load the arguments from pt_regs). We can do the same thing here, by adding an asm/syscall_wrapper.h file with a copy of all the relevant macros to override the generic version. Besides the addition of the compat entry point, these also rename the entry points with a __s390_ or __s390x_ prefix, similar to what we do on arm64 and x86. This in turn requires renaming a few things, and adding a proper ni_syscall() entry point. In order to still compile system call definitions that pass an loff_t argument, the __SC_COMPAT_CAST() macro checks for that and forces an -ENOSYS error, which was the best I could come up with. Those functions must obviously not get called from user space, but instead require hand-written compat_sys_*() handlers, which fortunately already exist. Link: https://lore.kernel.org/lkml/20190116131527.2071570-5-arnd@arndb.de Signed-off-by: Arnd Bergmann Reviewed-by: Heiko Carstens [heiko.carstens@de.ibm.com: compile fix for !CONFIG_COMPAT] Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/include/asm/syscall_wrapper.h | 135 ++++++++++++++++++++++++ arch/s390/kernel/compat_wrapper.c | 23 +--- arch/s390/kernel/entry.S | 4 +- arch/s390/kernel/sys_s390.c | 5 + 5 files changed, 147 insertions(+), 21 deletions(-) create mode 100644 arch/s390/include/asm/syscall_wrapper.h diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 41cafd245bbc..b6e3d0653002 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -75,6 +75,7 @@ config S390 select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX + select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h new file mode 100644 index 000000000000..5596c5c625d2 --- /dev/null +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * syscall_wrapper.h - s390 specific wrappers to syscall definitions + * + */ + +#ifndef _ASM_S390_SYSCALL_WRAPPER_H +#define _ASM_S390_SYSCALL_WRAPPER_H + +#ifdef CONFIG_COMPAT +#define __SC_COMPAT_TYPE(t, a) \ + __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a + +#define __SC_COMPAT_CAST(t, a) \ +({ \ + long __ReS = a; \ + \ + BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \ + !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t) && \ + !__TYPE_IS_LL(t)); \ + if (__TYPE_IS_L(t)) \ + __ReS = (s32)a; \ + if (__TYPE_IS_UL(t)) \ + __ReS = (u32)a; \ + if (__TYPE_IS_PTR(t)) \ + __ReS = a & 0x7fffffff; \ + if (__TYPE_IS_LL(t)) \ + return -ENOSYS; \ + (t)__ReS; \ +}) + +#define __S390_SYS_STUBx(x, name, ...) \ + asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ + ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \ + asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ + { \ + long ret = __s390x_sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__));\ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ + } + +/* + * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias + * named __s390x_sys_*() + */ +#define COMPAT_SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __s390_compat_sys_##sname(void); \ + ALLOW_ERROR_INJECTION(__s390_compat__sys_##sname, ERRNO); \ + asmlinkage long __s390_compat_sys_##sname(void) + +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __s390x_sys_##sname(void); \ + ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \ + asmlinkage long __s390_sys_##sname(void) \ + __attribute__((alias(__stringify(__s390x_sys_##sname)))); \ + asmlinkage long __s390x_sys_##sname(void) + +#define COND_SYSCALL(name) \ + cond_syscall(__s390x_sys_##name); \ + cond_syscall(__s390_sys_##name) + +#define SYS_NI(name) \ + SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers); \ + SYSCALL_ALIAS(__s390_sys_##name, sys_ni_posix_timers) + +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments");\ + asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ + __attribute__((alias(__stringify(__se_compat_sys##name)))); \ + ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + long ret = __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ + } \ + __diag_pop(); \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +/* + * As some compat syscalls may not be implemented, we need to expand + * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in + * kernel/time/posix-stubs.c to cover this case as well. + */ +#define COND_SYSCALL_COMPAT(name) \ + cond_syscall(__s390_compat_sys_##name) + +#define COMPAT_SYS_NI(name) \ + SYSCALL_ALIAS(__s390_compat_sys_##name, sys_ni_posix_timers) + +#else /* CONFIG_COMPAT */ + +#define __S390_SYS_STUBx(x, fullname, name, ...) + +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __s390x_sys_##sname(void); \ + ALLOW_ERROR_INJECTION(__s390x_sys_##sname, ERRNO); \ + asmlinkage long __s390x_sys_##sname(void) + +#define COND_SYSCALL(name) \ + cond_syscall(__s390x_sys_##name) + +#define SYS_NI(name) \ + SYSCALL_ALIAS(__s390x_sys_##name, sys_ni_posix_timers); + +#endif /* CONFIG_COMPAT */ + +#define __SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments");\ + asmlinkage long __s390x_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ + __attribute__((alias(__stringify(__se_sys##name)))); \ + ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \ + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + __S390_SYS_STUBx(x, name, __VA_ARGS__) \ + asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ + } \ + __diag_pop(); \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +#endif /* _ASM_X86_SYSCALL_WRAPPER_H */ diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index f54b7b73f316..9d07cd553fc7 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -25,21 +25,6 @@ #define __SC_COMPAT_TYPE(t, a) \ __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a -#define __SC_COMPAT_CAST(t, a) \ -({ \ - long __ReS = a; \ - \ - BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \ - !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t)); \ - if (__TYPE_IS_L(t)) \ - __ReS = (s32)a; \ - if (__TYPE_IS_UL(t)) \ - __ReS = (u32)a; \ - if (__TYPE_IS_PTR(t)) \ - __ReS = a & 0x7fffffff; \ - (t)__ReS; \ -}) - /* * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by * compat tasks. These wrappers will only be used for system calls where only @@ -53,11 +38,11 @@ * the regular system call wrappers. */ #define COMPAT_SYSCALL_WRAPx(x, name, ...) \ -asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ -asmlinkage long notrace compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\ -asmlinkage long notrace compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \ +asmlinkage long __s390_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ +asmlinkage long notrace __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ +asmlinkage long notrace __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ { \ - return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \ + return __s390_sys##name(__MAP(x,__SC_ARGS,__VA_ARGS__)); \ } COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 39191a0feed1..583d65ef5007 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1512,7 +1512,7 @@ cleanup_critical: .quad .Lsie_skip - .Lsie_entry #endif .section .rodata, "a" -#define SYSCALL(esame,emu) .long esame +#define SYSCALL(esame,emu) .long __s390x_ ## esame .globl sys_call_table sys_call_table: #include "asm/syscall_table.h" @@ -1520,7 +1520,7 @@ sys_call_table: #ifdef CONFIG_COMPAT -#define SYSCALL(esame,emu) .long emu +#define SYSCALL(esame,emu) .long __s390_ ## emu .globl sys_call_table_emu sys_call_table_emu: #include "asm/syscall_table.h" diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index fd0cbbed4d9f..202fa73ac167 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -95,3 +95,8 @@ SYSCALL_DEFINE1(s390_personality, unsigned int, personality) return ret; } + +SYSCALL_DEFINE0(ni_syscall) +{ + return -ENOSYS; +} From 90856087daca92bf1de820885ef4441613bf8e21 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 16 Jan 2019 14:15:23 +0100 Subject: [PATCH 05/54] s390: remove compat_wrapper.c Now that all these wrappers are automatically generated, we can remove the entire file, and instead point to the regualar syscalls like all other architectures do. The 31-bit pointer extension is now handled in the __s390_sys_*() wrappers. Link: https://lore.kernel.org/lkml/20190116131527.2071570-6-arnd@arndb.de Signed-off-by: Arnd Bergmann Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/compat_wrapper.c | 186 ----------------- arch/s390/kernel/syscalls/syscall.tbl | 278 +++++++++++++------------- 3 files changed, 140 insertions(+), 326 deletions(-) delete mode 100644 arch/s390/kernel/compat_wrapper.c diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index e216e116a9a9..4e188a7be501 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -65,7 +65,7 @@ obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o -obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y) +obj-$(CONFIG_COMPAT) += $(compat-obj-y) obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c deleted file mode 100644 index 9d07cd553fc7..000000000000 --- a/arch/s390/kernel/compat_wrapper.c +++ /dev/null @@ -1,186 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Compat system call wrappers. - * - * Copyright IBM Corp. 2014 - */ - -#include -#include -#include "entry.h" - -#define COMPAT_SYSCALL_WRAP1(name, ...) \ - COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__) -#define COMPAT_SYSCALL_WRAP2(name, ...) \ - COMPAT_SYSCALL_WRAPx(2, _##name, __VA_ARGS__) -#define COMPAT_SYSCALL_WRAP3(name, ...) \ - COMPAT_SYSCALL_WRAPx(3, _##name, __VA_ARGS__) -#define COMPAT_SYSCALL_WRAP4(name, ...) \ - COMPAT_SYSCALL_WRAPx(4, _##name, __VA_ARGS__) -#define COMPAT_SYSCALL_WRAP5(name, ...) \ - COMPAT_SYSCALL_WRAPx(5, _##name, __VA_ARGS__) -#define COMPAT_SYSCALL_WRAP6(name, ...) \ - COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__) - -#define __SC_COMPAT_TYPE(t, a) \ - __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a - -/* - * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by - * compat tasks. These wrappers will only be used for system calls where only - * the system call arguments need sign or zero extension or zeroing of the upper - * 33 bits of pointers. - * Note: since the wrapper function will afterwards call a system call which - * again performs zero and sign extension for all system call arguments with - * a size of less than eight bytes, these compat wrappers only touch those - * system call arguments with a size of eight bytes ((unsigned) long and - * pointers). Zero and sign extension for e.g. int parameters will be done by - * the regular system call wrappers. - */ -#define COMPAT_SYSCALL_WRAPx(x, name, ...) \ -asmlinkage long __s390_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ -asmlinkage long notrace __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ -asmlinkage long notrace __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ -{ \ - return __s390_sys##name(__MAP(x,__SC_ARGS,__VA_ARGS__)); \ -} - -COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode); -COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname); -COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname); -COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename); -COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev); -COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode); -COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name); -COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode); -COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname); -COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode); -COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname); -COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes); -COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk); -COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler); -COMPAT_SYSCALL_WRAP1(acct, const char __user *, name); -COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags); -COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename); -COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask); -COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len); -COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new); -COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz); -COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library); -COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags); -COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg); -COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len); -COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len); -COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile); -COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len); -COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name); -COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot); -COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs); -COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags); -COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr); -COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data); -COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2); -COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence); -COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags); -COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len); -COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len); -COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param); -COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param); -COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param); -COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr); -COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout); -COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5); -COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size); -COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr); -COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data); -COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group); -COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist); -COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist); -COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid); -COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid); -COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group); -COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old); -COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec); -COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior); -COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); -COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); -COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, size_t, size, int, flags); -COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count); -COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); -COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); -COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size); -COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, size_t, size); -COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, size_t, size); -COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size); -COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name); -COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name); -COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name); -COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr); -COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event); -COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout); -COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx); -COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result); -COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name); -COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id); -COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id); -COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags); -COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask); -COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode); -COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev); -COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag); -COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag); -COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname); -COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags); -COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname); -COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz); -COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode); -COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode); -COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags); -COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); -COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags); -COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache); -COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags); -COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags); -COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, unsigned long, tls); -COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim); -COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag); -COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2); -COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags); -COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags); -COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags); -COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags); -COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, void __user *, uargs) -COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags) -COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags) -COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size); -COMPAT_SYSCALL_WRAP3(s390_pci_mmio_write, const unsigned long, mmio_addr, const void __user *, user_buffer, const size_t, length); -COMPAT_SYSCALL_WRAP3(s390_pci_mmio_read, const unsigned long, mmio_addr, void __user *, user_buffer, const size_t, length); -COMPAT_SYSCALL_WRAP4(socketpair, int, family, int, type, int, protocol, int __user *, usockvec); -COMPAT_SYSCALL_WRAP3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen); -COMPAT_SYSCALL_WRAP3(connect, int, fd, struct sockaddr __user *, uservaddr, int, addrlen); -COMPAT_SYSCALL_WRAP4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, int __user *, upeer_addrlen, int, flags); -COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len); -COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len); -COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); -COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); -COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); -COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); -COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); -COMPAT_SYSCALL_WRAP4(s390_sthyi, unsigned long, code, void __user *, info, u64 __user *, rc, unsigned long, flags); -COMPAT_SYSCALL_WRAP5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags) -COMPAT_SYSCALL_WRAP4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) -COMPAT_SYSCALL_WRAP3(chown16, const char __user *, filename, u16, user, u16, group); -COMPAT_SYSCALL_WRAP3(lchown16, const char __user *, filename, u16, user, u16, group); -COMPAT_SYSCALL_WRAP3(fchown16, unsigned int, fd, u16, user, u16, group); -COMPAT_SYSCALL_WRAP2(setregid16, u16, rgid, u16, egid); -COMPAT_SYSCALL_WRAP1(setgid16, u16, gid); -COMPAT_SYSCALL_WRAP2(setreuid16, u16, ruid, u16, euid); -COMPAT_SYSCALL_WRAP1(setuid16, u16, uid); -COMPAT_SYSCALL_WRAP3(setresuid16, u16, ruid, u16, euid, u16, suid); -COMPAT_SYSCALL_WRAP3(getresuid16, u16 __user *, ruidp, u16 __user *, euidp, u16 __user *, suidp); -COMPAT_SYSCALL_WRAP3(setresgid16, u16, rgid, u16, egid, u16, sgid); -COMPAT_SYSCALL_WRAP3(getresgid16, u16 __user *, rgidp, u16 __user *, egidp, u16 __user *, sgidp); -COMPAT_SYSCALL_WRAP1(setfsuid16, u16, uid); -COMPAT_SYSCALL_WRAP1(setfsgid16, u16, gid); -COMPAT_SYSCALL_WRAP2(getgroups16, int, gidsetsize, u16 __user *, grouplist); -COMPAT_SYSCALL_WRAP2(setgroups16, int, gidsetsize, u16 __user *, grouplist); diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index f878f74c42fd..7413fd318e2a 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -15,86 +15,86 @@ 5 common open sys_open compat_sys_open 6 common close sys_close sys_close 7 common restart_syscall sys_restart_syscall sys_restart_syscall -8 common creat sys_creat compat_sys_creat -9 common link sys_link compat_sys_link -10 common unlink sys_unlink compat_sys_unlink +8 common creat sys_creat sys_creat +9 common link sys_link sys_link +10 common unlink sys_unlink sys_unlink 11 common execve sys_execve compat_sys_execve -12 common chdir sys_chdir compat_sys_chdir +12 common chdir sys_chdir sys_chdir 13 32 time - compat_sys_time -14 common mknod sys_mknod compat_sys_mknod -15 common chmod sys_chmod compat_sys_chmod -16 32 lchown - compat_sys_lchown16 +14 common mknod sys_mknod sys_mknod +15 common chmod sys_chmod sys_chmod +16 32 lchown - sys_lchown16 19 common lseek sys_lseek compat_sys_lseek 20 common getpid sys_getpid sys_getpid 21 common mount sys_mount compat_sys_mount -22 common umount sys_oldumount compat_sys_oldumount -23 32 setuid - compat_sys_setuid16 +22 common umount sys_oldumount sys_oldumount +23 32 setuid - sys_setuid16 24 32 getuid - sys_getuid16 25 32 stime - compat_sys_stime 26 common ptrace sys_ptrace compat_sys_ptrace 27 common alarm sys_alarm sys_alarm 29 common pause sys_pause sys_pause 30 common utime sys_utime compat_sys_utime -33 common access sys_access compat_sys_access +33 common access sys_access sys_access 34 common nice sys_nice sys_nice 36 common sync sys_sync sys_sync 37 common kill sys_kill sys_kill -38 common rename sys_rename compat_sys_rename -39 common mkdir sys_mkdir compat_sys_mkdir -40 common rmdir sys_rmdir compat_sys_rmdir +38 common rename sys_rename sys_rename +39 common mkdir sys_mkdir sys_mkdir +40 common rmdir sys_rmdir sys_rmdir 41 common dup sys_dup sys_dup -42 common pipe sys_pipe compat_sys_pipe +42 common pipe sys_pipe sys_pipe 43 common times sys_times compat_sys_times -45 common brk sys_brk compat_sys_brk -46 32 setgid - compat_sys_setgid16 +45 common brk sys_brk sys_brk +46 32 setgid - sys_setgid16 47 32 getgid - sys_getgid16 -48 common signal sys_signal compat_sys_signal +48 common signal sys_signal sys_signal 49 32 geteuid - sys_geteuid16 50 32 getegid - sys_getegid16 -51 common acct sys_acct compat_sys_acct -52 common umount2 sys_umount compat_sys_umount +51 common acct sys_acct sys_acct +52 common umount2 sys_umount sys_umount 54 common ioctl sys_ioctl compat_sys_ioctl 55 common fcntl sys_fcntl compat_sys_fcntl 57 common setpgid sys_setpgid sys_setpgid 60 common umask sys_umask sys_umask -61 common chroot sys_chroot compat_sys_chroot +61 common chroot sys_chroot sys_chroot 62 common ustat sys_ustat compat_sys_ustat 63 common dup2 sys_dup2 sys_dup2 64 common getppid sys_getppid sys_getppid 65 common getpgrp sys_getpgrp sys_getpgrp 66 common setsid sys_setsid sys_setsid 67 common sigaction sys_sigaction compat_sys_sigaction -70 32 setreuid - compat_sys_setreuid16 -71 32 setregid - compat_sys_setregid16 -72 common sigsuspend sys_sigsuspend compat_sys_sigsuspend +70 32 setreuid - sys_setreuid16 +71 32 setregid - sys_setregid16 +72 common sigsuspend sys_sigsuspend sys_sigsuspend 73 common sigpending sys_sigpending compat_sys_sigpending -74 common sethostname sys_sethostname compat_sys_sethostname +74 common sethostname sys_sethostname sys_sethostname 75 common setrlimit sys_setrlimit compat_sys_setrlimit 76 32 getrlimit - compat_sys_old_getrlimit 77 common getrusage sys_getrusage compat_sys_getrusage 78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday 79 common settimeofday sys_settimeofday compat_sys_settimeofday -80 32 getgroups - compat_sys_getgroups16 -81 32 setgroups - compat_sys_setgroups16 -83 common symlink sys_symlink compat_sys_symlink -85 common readlink sys_readlink compat_sys_readlink -86 common uselib sys_uselib compat_sys_uselib -87 common swapon sys_swapon compat_sys_swapon -88 common reboot sys_reboot compat_sys_reboot +80 32 getgroups - sys_getgroups16 +81 32 setgroups - sys_setgroups16 +83 common symlink sys_symlink sys_symlink +85 common readlink sys_readlink sys_readlink +86 common uselib sys_uselib sys_uselib +87 common swapon sys_swapon sys_swapon +88 common reboot sys_reboot sys_reboot 89 common readdir - compat_sys_old_readdir 90 common mmap sys_old_mmap compat_sys_s390_old_mmap -91 common munmap sys_munmap compat_sys_munmap +91 common munmap sys_munmap sys_munmap 92 common truncate sys_truncate compat_sys_truncate 93 common ftruncate sys_ftruncate compat_sys_ftruncate 94 common fchmod sys_fchmod sys_fchmod -95 32 fchown - compat_sys_fchown16 +95 32 fchown - sys_fchown16 96 common getpriority sys_getpriority sys_getpriority 97 common setpriority sys_setpriority sys_setpriority 99 common statfs sys_statfs compat_sys_statfs 100 common fstatfs sys_fstatfs compat_sys_fstatfs 101 32 ioperm - - 102 common socketcall sys_socketcall compat_sys_socketcall -103 common syslog sys_syslog compat_sys_syslog +103 common syslog sys_syslog sys_syslog 104 common setitimer sys_setitimer compat_sys_setitimer 105 common getitimer sys_getitimer compat_sys_getitimer 106 common stat sys_newstat compat_sys_newstat @@ -104,63 +104,63 @@ 111 common vhangup sys_vhangup sys_vhangup 112 common idle - - 114 common wait4 sys_wait4 compat_sys_wait4 -115 common swapoff sys_swapoff compat_sys_swapoff +115 common swapoff sys_swapoff sys_swapoff 116 common sysinfo sys_sysinfo compat_sys_sysinfo 117 common ipc sys_s390_ipc compat_sys_s390_ipc 118 common fsync sys_fsync sys_fsync 119 common sigreturn sys_sigreturn compat_sys_sigreturn -120 common clone sys_clone compat_sys_clone -121 common setdomainname sys_setdomainname compat_sys_setdomainname -122 common uname sys_newuname compat_sys_newuname +120 common clone sys_clone sys_clone +121 common setdomainname sys_setdomainname sys_setdomainname +122 common uname sys_newuname sys_newuname 124 common adjtimex sys_adjtimex compat_sys_adjtimex -125 common mprotect sys_mprotect compat_sys_mprotect +125 common mprotect sys_mprotect sys_mprotect 126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask 127 common create_module - - -128 common init_module sys_init_module compat_sys_init_module -129 common delete_module sys_delete_module compat_sys_delete_module +128 common init_module sys_init_module sys_init_module +129 common delete_module sys_delete_module sys_delete_module 130 common get_kernel_syms - - -131 common quotactl sys_quotactl compat_sys_quotactl +131 common quotactl sys_quotactl sys_quotactl 132 common getpgid sys_getpgid sys_getpgid 133 common fchdir sys_fchdir sys_fchdir -134 common bdflush sys_bdflush compat_sys_bdflush -135 common sysfs sys_sysfs compat_sys_sysfs +134 common bdflush sys_bdflush sys_bdflush +135 common sysfs sys_sysfs sys_sysfs 136 common personality sys_s390_personality sys_s390_personality 137 common afs_syscall - - -138 32 setfsuid - compat_sys_setfsuid16 -139 32 setfsgid - compat_sys_setfsgid16 -140 32 _llseek - compat_sys_llseek +138 32 setfsuid - sys_setfsuid16 +139 32 setfsgid - sys_setfsgid16 +140 32 _llseek - sys_llseek 141 common getdents sys_getdents compat_sys_getdents 142 32 _newselect - compat_sys_select 142 64 select sys_select - 143 common flock sys_flock sys_flock -144 common msync sys_msync compat_sys_msync +144 common msync sys_msync sys_msync 145 common readv sys_readv compat_sys_readv 146 common writev sys_writev compat_sys_writev 147 common getsid sys_getsid sys_getsid 148 common fdatasync sys_fdatasync sys_fdatasync 149 common _sysctl sys_sysctl compat_sys_sysctl -150 common mlock sys_mlock compat_sys_mlock -151 common munlock sys_munlock compat_sys_munlock +150 common mlock sys_mlock sys_mlock +151 common munlock sys_munlock sys_munlock 152 common mlockall sys_mlockall sys_mlockall 153 common munlockall sys_munlockall sys_munlockall -154 common sched_setparam sys_sched_setparam compat_sys_sched_setparam -155 common sched_getparam sys_sched_getparam compat_sys_sched_getparam -156 common sched_setscheduler sys_sched_setscheduler compat_sys_sched_setscheduler +154 common sched_setparam sys_sched_setparam sys_sched_setparam +155 common sched_getparam sys_sched_getparam sys_sched_getparam +156 common sched_setscheduler sys_sched_setscheduler sys_sched_setscheduler 157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler 158 common sched_yield sys_sched_yield sys_sched_yield 159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max 160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min 161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval 162 common nanosleep sys_nanosleep compat_sys_nanosleep -163 common mremap sys_mremap compat_sys_mremap -164 32 setresuid - compat_sys_setresuid16 -165 32 getresuid - compat_sys_getresuid16 +163 common mremap sys_mremap sys_mremap +164 32 setresuid - sys_setresuid16 +165 32 getresuid - sys_getresuid16 167 common query_module - - -168 common poll sys_poll compat_sys_poll +168 common poll sys_poll sys_poll 169 common nfsservctl - - -170 32 setresgid - compat_sys_setresgid16 -171 32 getresgid - compat_sys_getresgid16 -172 common prctl sys_prctl compat_sys_prctl +170 32 setresgid - sys_setresgid16 +171 32 getresgid - sys_getresgid16 +172 common prctl sys_prctl sys_prctl 173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn 174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask @@ -170,10 +170,10 @@ 179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 180 common pread64 sys_pread64 compat_sys_s390_pread64 181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64 -182 32 chown - compat_sys_chown16 -183 common getcwd sys_getcwd compat_sys_getcwd -184 common capget sys_capget compat_sys_capget -185 common capset sys_capset compat_sys_capset +182 32 chown - sys_chown16 +183 common getcwd sys_getcwd sys_getcwd +184 common capget sys_capget sys_capget +185 common capset sys_capset sys_capset 186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack 187 common sendfile sys_sendfile64 compat_sys_sendfile 188 common getpmsg - - @@ -187,7 +187,7 @@ 195 32 stat64 - compat_sys_s390_stat64 196 32 lstat64 - compat_sys_s390_lstat64 197 32 fstat64 - compat_sys_s390_fstat64 -198 32 lchown32 - compat_sys_lchown +198 32 lchown32 - sys_lchown 198 64 lchown sys_lchown - 199 32 getuid32 - sys_getuid 199 64 getuid sys_getuid - @@ -201,21 +201,21 @@ 203 64 setreuid sys_setreuid - 204 32 setregid32 - sys_setregid 204 64 setregid sys_setregid - -205 32 getgroups32 - compat_sys_getgroups +205 32 getgroups32 - sys_getgroups 205 64 getgroups sys_getgroups - -206 32 setgroups32 - compat_sys_setgroups +206 32 setgroups32 - sys_setgroups 206 64 setgroups sys_setgroups - 207 32 fchown32 - sys_fchown 207 64 fchown sys_fchown - 208 32 setresuid32 - sys_setresuid 208 64 setresuid sys_setresuid - -209 32 getresuid32 - compat_sys_getresuid +209 32 getresuid32 - sys_getresuid 209 64 getresuid sys_getresuid - 210 32 setresgid32 - sys_setresgid 210 64 setresgid sys_setresgid - -211 32 getresgid32 - compat_sys_getresgid +211 32 getresgid32 - sys_getresgid 211 64 getresgid sys_getresgid - -212 32 chown32 - compat_sys_chown +212 32 chown32 - sys_chown 212 64 chown sys_chown - 213 32 setuid32 - sys_setuid 213 64 setuid sys_setuid - @@ -225,25 +225,25 @@ 215 64 setfsuid sys_setfsuid - 216 32 setfsgid32 - sys_setfsgid 216 64 setfsgid sys_setfsgid - -217 common pivot_root sys_pivot_root compat_sys_pivot_root -218 common mincore sys_mincore compat_sys_mincore -219 common madvise sys_madvise compat_sys_madvise -220 common getdents64 sys_getdents64 compat_sys_getdents64 +217 common pivot_root sys_pivot_root sys_pivot_root +218 common mincore sys_mincore sys_mincore +219 common madvise sys_madvise sys_madvise +220 common getdents64 sys_getdents64 sys_getdents64 221 32 fcntl64 - compat_sys_fcntl64 222 common readahead sys_readahead compat_sys_s390_readahead 223 32 sendfile64 - compat_sys_sendfile64 -224 common setxattr sys_setxattr compat_sys_setxattr -225 common lsetxattr sys_lsetxattr compat_sys_lsetxattr -226 common fsetxattr sys_fsetxattr compat_sys_fsetxattr -227 common getxattr sys_getxattr compat_sys_getxattr -228 common lgetxattr sys_lgetxattr compat_sys_lgetxattr -229 common fgetxattr sys_fgetxattr compat_sys_fgetxattr -230 common listxattr sys_listxattr compat_sys_listxattr -231 common llistxattr sys_llistxattr compat_sys_llistxattr -232 common flistxattr sys_flistxattr compat_sys_flistxattr -233 common removexattr sys_removexattr compat_sys_removexattr -234 common lremovexattr sys_lremovexattr compat_sys_lremovexattr -235 common fremovexattr sys_fremovexattr compat_sys_fremovexattr +224 common setxattr sys_setxattr sys_setxattr +225 common lsetxattr sys_lsetxattr sys_lsetxattr +226 common fsetxattr sys_fsetxattr sys_fsetxattr +227 common getxattr sys_getxattr sys_getxattr +228 common lgetxattr sys_lgetxattr sys_lgetxattr +229 common fgetxattr sys_fgetxattr sys_fgetxattr +230 common listxattr sys_listxattr sys_listxattr +231 common llistxattr sys_llistxattr sys_llistxattr +232 common flistxattr sys_flistxattr sys_flistxattr +233 common removexattr sys_removexattr sys_removexattr +234 common lremovexattr sys_lremovexattr sys_lremovexattr +235 common fremovexattr sys_fremovexattr sys_fremovexattr 236 common gettid sys_gettid sys_gettid 237 common tkill sys_tkill sys_tkill 238 common futex sys_futex compat_sys_futex @@ -251,15 +251,15 @@ 240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity 241 common tgkill sys_tgkill sys_tgkill 243 common io_setup sys_io_setup compat_sys_io_setup -244 common io_destroy sys_io_destroy compat_sys_io_destroy +244 common io_destroy sys_io_destroy sys_io_destroy 245 common io_getevents sys_io_getevents compat_sys_io_getevents 246 common io_submit sys_io_submit compat_sys_io_submit -247 common io_cancel sys_io_cancel compat_sys_io_cancel +247 common io_cancel sys_io_cancel sys_io_cancel 248 common exit_group sys_exit_group sys_exit_group 249 common epoll_create sys_epoll_create sys_epoll_create -250 common epoll_ctl sys_epoll_ctl compat_sys_epoll_ctl -251 common epoll_wait sys_epoll_wait compat_sys_epoll_wait -252 common set_tid_address sys_set_tid_address compat_sys_set_tid_address +250 common epoll_ctl sys_epoll_ctl sys_epoll_ctl +251 common epoll_wait sys_epoll_wait sys_epoll_wait +252 common set_tid_address sys_set_tid_address sys_set_tid_address 253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64 254 common timer_create sys_timer_create compat_sys_timer_create 255 common timer_settime sys_timer_settime compat_sys_timer_settime @@ -273,52 +273,52 @@ 264 32 fadvise64_64 - compat_sys_s390_fadvise64_64 265 common statfs64 sys_statfs64 compat_sys_statfs64 266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 -267 common remap_file_pages sys_remap_file_pages compat_sys_remap_file_pages +267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages 268 common mbind sys_mbind compat_sys_mbind 269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy 270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy 271 common mq_open sys_mq_open compat_sys_mq_open -272 common mq_unlink sys_mq_unlink compat_sys_mq_unlink +272 common mq_unlink sys_mq_unlink sys_mq_unlink 273 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend 274 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive 275 common mq_notify sys_mq_notify compat_sys_mq_notify 276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr 277 common kexec_load sys_kexec_load compat_sys_kexec_load -278 common add_key sys_add_key compat_sys_add_key -279 common request_key sys_request_key compat_sys_request_key +278 common add_key sys_add_key sys_add_key +279 common request_key sys_request_key sys_request_key 280 common keyctl sys_keyctl compat_sys_keyctl 281 common waitid sys_waitid compat_sys_waitid 282 common ioprio_set sys_ioprio_set sys_ioprio_set 283 common ioprio_get sys_ioprio_get sys_ioprio_get 284 common inotify_init sys_inotify_init sys_inotify_init -285 common inotify_add_watch sys_inotify_add_watch compat_sys_inotify_add_watch +285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch 286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch 287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages 288 common openat sys_openat compat_sys_openat -289 common mkdirat sys_mkdirat compat_sys_mkdirat -290 common mknodat sys_mknodat compat_sys_mknodat -291 common fchownat sys_fchownat compat_sys_fchownat +289 common mkdirat sys_mkdirat sys_mkdirat +290 common mknodat sys_mknodat sys_mknodat +291 common fchownat sys_fchownat sys_fchownat 292 common futimesat sys_futimesat compat_sys_futimesat 293 32 fstatat64 - compat_sys_s390_fstatat64 293 64 newfstatat sys_newfstatat - -294 common unlinkat sys_unlinkat compat_sys_unlinkat -295 common renameat sys_renameat compat_sys_renameat -296 common linkat sys_linkat compat_sys_linkat -297 common symlinkat sys_symlinkat compat_sys_symlinkat -298 common readlinkat sys_readlinkat compat_sys_readlinkat -299 common fchmodat sys_fchmodat compat_sys_fchmodat -300 common faccessat sys_faccessat compat_sys_faccessat +294 common unlinkat sys_unlinkat sys_unlinkat +295 common renameat sys_renameat sys_renameat +296 common linkat sys_linkat sys_linkat +297 common symlinkat sys_symlinkat sys_symlinkat +298 common readlinkat sys_readlinkat sys_readlinkat +299 common fchmodat sys_fchmodat sys_fchmodat +300 common faccessat sys_faccessat sys_faccessat 301 common pselect6 sys_pselect6 compat_sys_pselect6 302 common ppoll sys_ppoll compat_sys_ppoll -303 common unshare sys_unshare compat_sys_unshare +303 common unshare sys_unshare sys_unshare 304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list 305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list -306 common splice sys_splice compat_sys_splice +306 common splice sys_splice sys_splice 307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range -308 common tee sys_tee compat_sys_tee +308 common tee sys_tee sys_tee 309 common vmsplice sys_vmsplice compat_sys_vmsplice 310 common move_pages sys_move_pages compat_sys_move_pages -311 common getcpu sys_getcpu compat_sys_getcpu +311 common getcpu sys_getcpu sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 313 common utimes sys_utimes compat_sys_utimes 314 common fallocate sys_fallocate compat_sys_s390_fallocate @@ -332,17 +332,17 @@ 322 common signalfd4 sys_signalfd4 compat_sys_signalfd4 323 common eventfd2 sys_eventfd2 sys_eventfd2 324 common inotify_init1 sys_inotify_init1 sys_inotify_init1 -325 common pipe2 sys_pipe2 compat_sys_pipe2 +325 common pipe2 sys_pipe2 sys_pipe2 326 common dup3 sys_dup3 sys_dup3 327 common epoll_create1 sys_epoll_create1 sys_epoll_create1 328 common preadv sys_preadv compat_sys_preadv 329 common pwritev sys_pwritev compat_sys_pwritev 330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -331 common perf_event_open sys_perf_event_open compat_sys_perf_event_open +331 common perf_event_open sys_perf_event_open sys_perf_event_open 332 common fanotify_init sys_fanotify_init sys_fanotify_init 333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark -334 common prlimit64 sys_prlimit64 compat_sys_prlimit64 -335 common name_to_handle_at sys_name_to_handle_at compat_sys_name_to_handle_at +334 common prlimit64 sys_prlimit64 sys_prlimit64 +335 common name_to_handle_at sys_name_to_handle_at sys_name_to_handle_at 336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at 337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime 338 common syncfs sys_syncfs sys_syncfs @@ -350,44 +350,44 @@ 340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv 341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr -343 common kcmp sys_kcmp compat_sys_kcmp -344 common finit_module sys_finit_module compat_sys_finit_module -345 common sched_setattr sys_sched_setattr compat_sys_sched_setattr -346 common sched_getattr sys_sched_getattr compat_sys_sched_getattr -347 common renameat2 sys_renameat2 compat_sys_renameat2 -348 common seccomp sys_seccomp compat_sys_seccomp -349 common getrandom sys_getrandom compat_sys_getrandom -350 common memfd_create sys_memfd_create compat_sys_memfd_create -351 common bpf sys_bpf compat_sys_bpf -352 common s390_pci_mmio_write sys_s390_pci_mmio_write compat_sys_s390_pci_mmio_write -353 common s390_pci_mmio_read sys_s390_pci_mmio_read compat_sys_s390_pci_mmio_read +343 common kcmp sys_kcmp sys_kcmp +344 common finit_module sys_finit_module sys_finit_module +345 common sched_setattr sys_sched_setattr sys_sched_setattr +346 common sched_getattr sys_sched_getattr sys_sched_getattr +347 common renameat2 sys_renameat2 sys_renameat2 +348 common seccomp sys_seccomp sys_seccomp +349 common getrandom sys_getrandom sys_getrandom +350 common memfd_create sys_memfd_create sys_memfd_create +351 common bpf sys_bpf sys_bpf +352 common s390_pci_mmio_write sys_s390_pci_mmio_write sys_s390_pci_mmio_write +353 common s390_pci_mmio_read sys_s390_pci_mmio_read sys_s390_pci_mmio_read 354 common execveat sys_execveat compat_sys_execveat 355 common userfaultfd sys_userfaultfd sys_userfaultfd 356 common membarrier sys_membarrier sys_membarrier 357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg 358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg 359 common socket sys_socket sys_socket -360 common socketpair sys_socketpair compat_sys_socketpair -361 common bind sys_bind compat_sys_bind -362 common connect sys_connect compat_sys_connect +360 common socketpair sys_socketpair sys_socketpair +361 common bind sys_bind sys_bind +362 common connect sys_connect sys_connect 363 common listen sys_listen sys_listen -364 common accept4 sys_accept4 compat_sys_accept4 +364 common accept4 sys_accept4 sys_accept4 365 common getsockopt sys_getsockopt compat_sys_getsockopt 366 common setsockopt sys_setsockopt compat_sys_setsockopt -367 common getsockname sys_getsockname compat_sys_getsockname -368 common getpeername sys_getpeername compat_sys_getpeername -369 common sendto sys_sendto compat_sys_sendto +367 common getsockname sys_getsockname sys_getsockname +368 common getpeername sys_getpeername sys_getpeername +369 common sendto sys_sendto sys_sendto 370 common sendmsg sys_sendmsg compat_sys_sendmsg 371 common recvfrom sys_recvfrom compat_sys_recvfrom 372 common recvmsg sys_recvmsg compat_sys_recvmsg 373 common shutdown sys_shutdown sys_shutdown -374 common mlock2 sys_mlock2 compat_sys_mlock2 -375 common copy_file_range sys_copy_file_range compat_sys_copy_file_range +374 common mlock2 sys_mlock2 sys_mlock2 +375 common copy_file_range sys_copy_file_range sys_copy_file_range 376 common preadv2 sys_preadv2 compat_sys_preadv2 377 common pwritev2 sys_pwritev2 compat_sys_pwritev2 -378 common s390_guarded_storage sys_s390_guarded_storage compat_sys_s390_guarded_storage -379 common statx sys_statx compat_sys_statx -380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi -381 common kexec_file_load sys_kexec_file_load compat_sys_kexec_file_load +378 common s390_guarded_storage sys_s390_guarded_storage sys_s390_guarded_storage +379 common statx sys_statx sys_statx +380 common s390_sthyi sys_s390_sthyi sys_s390_sthyi +381 common kexec_file_load sys_kexec_file_load sys_kexec_file_load 382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents -383 common rseq sys_rseq compat_sys_rseq +383 common rseq sys_rseq sys_rseq From 32b77252f47ec00c3c9dc4705f0197dd0f5f87d9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Dec 2018 16:07:50 +0100 Subject: [PATCH 06/54] s390: remove the ptep_modify_prot_{start,commit} exports These two functions are only used by core MM code, so no need to export them. Signed-off-by: Christoph Hellwig Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index f2cc7da473e4..689b66f29fc6 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -318,7 +318,6 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, } return old; } -EXPORT_SYMBOL(ptep_modify_prot_start); void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) @@ -337,7 +336,6 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, } preempt_enable(); } -EXPORT_SYMBOL(ptep_modify_prot_commit); static inline void pmdp_idte_local(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) From 7e0d92f002460d30bea01fa7157be2f13af370a5 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 13 Dec 2018 15:53:48 +0100 Subject: [PATCH 07/54] s390/kasan: improve string/memory functions checks Avoid using arch specific implementations of string/memory functions with KASAN since gcc cannot instrument asm code memory accesses and many bugs could be missed. Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/string.c | 1 + arch/s390/include/asm/string.h | 28 ++++++++++++++++++++++------ arch/s390/lib/string.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c index 25aca07898ba..b11e8108773a 100644 --- a/arch/s390/boot/string.c +++ b/arch/s390/boot/string.c @@ -2,6 +2,7 @@ #include #include #include +#undef CONFIG_KASAN #include "../lib/string.c" int strncmp(const char *cs, const char *ct, size_t count) diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h index 116cc15a4b8a..70d87db54e62 100644 --- a/arch/s390/include/asm/string.h +++ b/arch/s390/include/asm/string.h @@ -12,15 +12,21 @@ #include #endif -#define __HAVE_ARCH_MEMCHR /* inline & arch function */ -#define __HAVE_ARCH_MEMCMP /* arch function */ #define __HAVE_ARCH_MEMCPY /* gcc builtin & arch function */ #define __HAVE_ARCH_MEMMOVE /* gcc builtin & arch function */ -#define __HAVE_ARCH_MEMSCAN /* inline & arch function */ #define __HAVE_ARCH_MEMSET /* gcc builtin & arch function */ #define __HAVE_ARCH_MEMSET16 /* arch function */ #define __HAVE_ARCH_MEMSET32 /* arch function */ #define __HAVE_ARCH_MEMSET64 /* arch function */ + +void *memcpy(void *dest, const void *src, size_t n); +void *memset(void *s, int c, size_t n); +void *memmove(void *dest, const void *src, size_t n); + +#ifndef CONFIG_KASAN +#define __HAVE_ARCH_MEMCHR /* inline & arch function */ +#define __HAVE_ARCH_MEMCMP /* arch function */ +#define __HAVE_ARCH_MEMSCAN /* inline & arch function */ #define __HAVE_ARCH_STRCAT /* inline & arch function */ #define __HAVE_ARCH_STRCMP /* arch function */ #define __HAVE_ARCH_STRCPY /* inline & arch function */ @@ -35,9 +41,6 @@ /* Prototypes for non-inlined arch strings functions. */ int memcmp(const void *s1, const void *s2, size_t n); -void *memcpy(void *dest, const void *src, size_t n); -void *memset(void *s, int c, size_t n); -void *memmove(void *dest, const void *src, size_t n); int strcmp(const char *s1, const char *s2); size_t strlcat(char *dest, const char *src, size_t n); size_t strlcpy(char *dest, const char *src, size_t size); @@ -45,6 +48,7 @@ char *strncat(char *dest, const char *src, size_t n); char *strncpy(char *dest, const char *src, size_t n); char *strrchr(const char *s, int c); char *strstr(const char *s1, const char *s2); +#endif /* !CONFIG_KASAN */ #undef __HAVE_ARCH_STRCHR #undef __HAVE_ARCH_STRNCHR @@ -95,6 +99,7 @@ static inline void *memset64(uint64_t *s, uint64_t v, size_t count) #if !defined(IN_ARCH_STRING_C) && (!defined(CONFIG_FORTIFY_SOURCE) || defined(__NO_FORTIFY)) +#ifdef __HAVE_ARCH_MEMCHR static inline void *memchr(const void * s, int c, size_t n) { register int r0 asm("0") = (char) c; @@ -109,7 +114,9 @@ static inline void *memchr(const void * s, int c, size_t n) : "+a" (ret), "+&a" (s) : "d" (r0) : "cc", "memory"); return (void *) ret; } +#endif +#ifdef __HAVE_ARCH_MEMSCAN static inline void *memscan(void *s, int c, size_t n) { register int r0 asm("0") = (char) c; @@ -121,7 +128,9 @@ static inline void *memscan(void *s, int c, size_t n) : "+a" (ret), "+&a" (s) : "d" (r0) : "cc", "memory"); return (void *) ret; } +#endif +#ifdef __HAVE_ARCH_STRCAT static inline char *strcat(char *dst, const char *src) { register int r0 asm("0") = 0; @@ -137,7 +146,9 @@ static inline char *strcat(char *dst, const char *src) : "d" (r0), "0" (0) : "cc", "memory" ); return ret; } +#endif +#ifdef __HAVE_ARCH_STRCPY static inline char *strcpy(char *dst, const char *src) { register int r0 asm("0") = 0; @@ -150,7 +161,9 @@ static inline char *strcpy(char *dst, const char *src) : "cc", "memory"); return ret; } +#endif +#ifdef __HAVE_ARCH_STRLEN static inline size_t strlen(const char *s) { register unsigned long r0 asm("0") = 0; @@ -162,7 +175,9 @@ static inline size_t strlen(const char *s) : "+d" (r0), "+a" (tmp) : : "cc", "memory"); return r0 - (unsigned long) s; } +#endif +#ifdef __HAVE_ARCH_STRNLEN static inline size_t strnlen(const char * s, size_t n) { register int r0 asm("0") = 0; @@ -175,6 +190,7 @@ static inline size_t strnlen(const char * s, size_t n) : "+a" (end), "+a" (tmp) : "d" (r0) : "cc", "memory"); return end - s; } +#endif #else /* IN_ARCH_STRING_C */ void *memchr(const void * s, int c, size_t n); void *memscan(void *s, int c, size_t n); diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index a10e11f7a5f7..0e30e6e43b0c 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -43,11 +43,13 @@ static inline char *__strnend(const char *s, size_t n) * * returns the length of @s */ +#ifdef __HAVE_ARCH_STRLEN size_t strlen(const char *s) { return __strend(s) - s; } EXPORT_SYMBOL(strlen); +#endif /** * strnlen - Find the length of a length-limited string @@ -56,11 +58,13 @@ EXPORT_SYMBOL(strlen); * * returns the minimum of the length of @s and @n */ +#ifdef __HAVE_ARCH_STRNLEN size_t strnlen(const char *s, size_t n) { return __strnend(s, n) - s; } EXPORT_SYMBOL(strnlen); +#endif /** * strcpy - Copy a %NUL terminated string @@ -69,6 +73,7 @@ EXPORT_SYMBOL(strnlen); * * returns a pointer to @dest */ +#ifdef __HAVE_ARCH_STRCPY char *strcpy(char *dest, const char *src) { register int r0 asm("0") = 0; @@ -81,6 +86,7 @@ char *strcpy(char *dest, const char *src) return ret; } EXPORT_SYMBOL(strcpy); +#endif /** * strlcpy - Copy a %NUL terminated string into a sized buffer @@ -93,6 +99,7 @@ EXPORT_SYMBOL(strcpy); * of course, the buffer size is zero). It does not pad * out the result like strncpy() does. */ +#ifdef __HAVE_ARCH_STRLCPY size_t strlcpy(char *dest, const char *src, size_t size) { size_t ret = __strend(src) - src; @@ -105,6 +112,7 @@ size_t strlcpy(char *dest, const char *src, size_t size) return ret; } EXPORT_SYMBOL(strlcpy); +#endif /** * strncpy - Copy a length-limited, %NUL-terminated string @@ -115,6 +123,7 @@ EXPORT_SYMBOL(strlcpy); * The result is not %NUL-terminated if the source exceeds * @n bytes. */ +#ifdef __HAVE_ARCH_STRNCPY char *strncpy(char *dest, const char *src, size_t n) { size_t len = __strnend(src, n) - src; @@ -123,6 +132,7 @@ char *strncpy(char *dest, const char *src, size_t n) return dest; } EXPORT_SYMBOL(strncpy); +#endif /** * strcat - Append one %NUL-terminated string to another @@ -131,6 +141,7 @@ EXPORT_SYMBOL(strncpy); * * returns a pointer to @dest */ +#ifdef __HAVE_ARCH_STRCAT char *strcat(char *dest, const char *src) { register int r0 asm("0") = 0; @@ -146,6 +157,7 @@ char *strcat(char *dest, const char *src) return ret; } EXPORT_SYMBOL(strcat); +#endif /** * strlcat - Append a length-limited, %NUL-terminated string to another @@ -153,6 +165,7 @@ EXPORT_SYMBOL(strcat); * @src: The string to append to it * @n: The size of the destination buffer. */ +#ifdef __HAVE_ARCH_STRLCAT size_t strlcat(char *dest, const char *src, size_t n) { size_t dsize = __strend(dest) - dest; @@ -170,6 +183,7 @@ size_t strlcat(char *dest, const char *src, size_t n) return res; } EXPORT_SYMBOL(strlcat); +#endif /** * strncat - Append a length-limited, %NUL-terminated string to another @@ -182,6 +196,7 @@ EXPORT_SYMBOL(strlcat); * Note that in contrast to strncpy, strncat ensures the result is * terminated. */ +#ifdef __HAVE_ARCH_STRNCAT char *strncat(char *dest, const char *src, size_t n) { size_t len = __strnend(src, n) - src; @@ -192,6 +207,7 @@ char *strncat(char *dest, const char *src, size_t n) return dest; } EXPORT_SYMBOL(strncat); +#endif /** * strcmp - Compare two strings @@ -202,6 +218,7 @@ EXPORT_SYMBOL(strncat); * < 0 if @s1 is less than @s2 * > 0 if @s1 is greater than @s2 */ +#ifdef __HAVE_ARCH_STRCMP int strcmp(const char *s1, const char *s2) { register int r0 asm("0") = 0; @@ -219,12 +236,14 @@ int strcmp(const char *s1, const char *s2) return ret; } EXPORT_SYMBOL(strcmp); +#endif /** * strrchr - Find the last occurrence of a character in a string * @s: The string to be searched * @c: The character to search for */ +#ifdef __HAVE_ARCH_STRRCHR char *strrchr(const char *s, int c) { size_t len = __strend(s) - s; @@ -237,6 +256,7 @@ char *strrchr(const char *s, int c) return NULL; } EXPORT_SYMBOL(strrchr); +#endif static inline int clcle(const char *s1, unsigned long l1, const char *s2, unsigned long l2) @@ -261,6 +281,7 @@ static inline int clcle(const char *s1, unsigned long l1, * @s1: The string to be searched * @s2: The string to search for */ +#ifdef __HAVE_ARCH_STRSTR char *strstr(const char *s1, const char *s2) { int l1, l2; @@ -280,6 +301,7 @@ char *strstr(const char *s1, const char *s2) return NULL; } EXPORT_SYMBOL(strstr); +#endif /** * memchr - Find a character in an area of memory. @@ -290,6 +312,7 @@ EXPORT_SYMBOL(strstr); * returns the address of the first occurrence of @c, or %NULL * if @c is not found */ +#ifdef __HAVE_ARCH_MEMCHR void *memchr(const void *s, int c, size_t n) { register int r0 asm("0") = (char) c; @@ -304,6 +327,7 @@ void *memchr(const void *s, int c, size_t n) return (void *) ret; } EXPORT_SYMBOL(memchr); +#endif /** * memcmp - Compare two areas of memory @@ -311,6 +335,7 @@ EXPORT_SYMBOL(memchr); * @s2: Another area of memory * @count: The size of the area. */ +#ifdef __HAVE_ARCH_MEMCMP int memcmp(const void *s1, const void *s2, size_t n) { int ret; @@ -321,6 +346,7 @@ int memcmp(const void *s1, const void *s2, size_t n) return ret; } EXPORT_SYMBOL(memcmp); +#endif /** * memscan - Find a character in an area of memory. @@ -331,6 +357,7 @@ EXPORT_SYMBOL(memcmp); * returns the address of the first occurrence of @c, or 1 byte past * the area if @c is not found */ +#ifdef __HAVE_ARCH_MEMSCAN void *memscan(void *s, int c, size_t n) { register int r0 asm("0") = (char) c; @@ -342,3 +369,4 @@ void *memscan(void *s, int c, size_t n) return (void *) ret; } EXPORT_SYMBOL(memscan); +#endif From c6ac875446f96564232931fafd149e43e728dd6f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 9 Jan 2019 11:35:41 -0600 Subject: [PATCH 08/54] s390/hypfs: Use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; instance = kzalloc(sizeof(struct foo) + sizeof(void *) * count, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs_diag0c.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c index cebf05150cc1..9c398bf0ddc6 100644 --- a/arch/s390/hypfs/hypfs_diag0c.c +++ b/arch/s390/hypfs/hypfs_diag0c.c @@ -54,8 +54,7 @@ static void *diag0c_store(unsigned int *count) if (!cpu_vec) goto fail_put_online_cpus; /* Note: Diag 0c needs 8 byte alignment and real storage */ - diag0c_data = kzalloc(sizeof(struct hypfs_diag0c_hdr) + - cpu_count * sizeof(struct hypfs_diag0c_entry), + diag0c_data = kzalloc(struct_size(diag0c_data, entry, cpu_count), GFP_KERNEL | GFP_DMA); if (!diag0c_data) goto fail_kfree_cpu_vec; From fb8bfca06cbc8280976ea751501767ae1f37dff5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Jan 2019 10:30:44 +0100 Subject: [PATCH 09/54] s390: fix system call tracing When converting to autogenerated compat syscall wrappers all system call entry points got a different symbol name: they all got a __s390x_ prefix. This caused breakage with system call tracing, since an appropriate arch_syscall_match_sym_name() was not provided. Add this function, and while at it also add code to avoid compat system call tracing. s390 has different system call tables for native 64 bit system calls and compat system calls. This isn't really supported in the common code. However there are hardly any compat binaries left, therefore just ignore compat system calls, like x86 and arm64 also do for the same reason. Fixes: aa0d6e70d3b3 ("s390: autogenerate compat syscall wrappers") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ftrace.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 8ea270fdc7fb..5a3c95b11952 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -81,5 +81,30 @@ static inline void ftrace_generate_call_insn(struct ftrace_insn *insn, #endif } +/* + * Even though the system call numbers are identical for s390/s390x a + * different system call table is used for compat tasks. This may lead + * to e.g. incorrect or missing trace event sysfs files. + * Therefore simply do not trace compat system calls at all. + * See kernel/trace/trace_syscalls.c. + */ +#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS +static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) +{ + return is_compat_task(); +} + +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +static inline bool arch_syscall_match_sym_name(const char *sym, + const char *name) +{ + /* + * Skip __s390_ and __s390x_ prefix - due to compat wrappers + * and aliasing some symbols of 64 bit system call functions + * may get the __s390_ prefix instead of the __s390x_ prefix. + */ + return !strcmp(sym + 7, name) || !strcmp(sym + 8, name); +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_FTRACE_H */ From 4ad78b8651aacf26b3ab6d1e784952eb70469c43 Mon Sep 17 00:00:00 2001 From: Collin Walling Date: Thu, 6 Dec 2018 17:30:04 -0500 Subject: [PATCH 10/54] s390/setup: set control program code via diag 318 The s390x diagnose 318 instruction sets the control program name code (CPNC) and control program version code (CPVC) to provide useful information regarding the OS during debugging. The CPNC is explicitly set to 4 to indicate a Linux/KVM environment. The CPVC is a 7-byte value containing: - 3-byte Linux version code, currently set to 0 - 3-byte unique value, currently set to 0 - 1-byte trailing null Signed-off-by: Collin Walling Acked-by: Janosch Frank Acked-by: Heiko Carstens Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Message-Id: <1544135405-22385-2-git-send-email-walling@linux.ibm.com> [set version code to 0 until the structure is fully defined] Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/diag.h | 12 ++++++++++++ arch/s390/include/asm/sclp.h | 1 + arch/s390/kernel/diag.c | 1 + arch/s390/kernel/setup.c | 21 +++++++++++++++++++++ drivers/s390/char/sclp.h | 4 +++- drivers/s390/char/sclp_early.c | 2 ++ 6 files changed, 40 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index cdbaad50c7c7..19562be22b7e 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -32,6 +32,7 @@ enum diag_stat_enum { DIAG_STAT_X2FC, DIAG_STAT_X304, DIAG_STAT_X308, + DIAG_STAT_X318, DIAG_STAT_X500, NR_DIAG_STAT }; @@ -293,6 +294,17 @@ struct diag26c_mac_resp { u8 res[2]; } __aligned(8); +#define CPNC_LINUX 0x4 +union diag318_info { + unsigned long val; + struct { + unsigned int cpnc : 8; + unsigned int cpvc_linux : 24; + unsigned char cpvc_distro[3]; + unsigned char zero; + }; +}; + int diag204(unsigned long subcode, unsigned long size, void *addr); int diag224(void *ptr); int diag26c(void *req, void *resp, enum diag26c_sc subcode); diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 0cd4bda85eb1..ef4c9dec06a4 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -78,6 +78,7 @@ struct sclp_info { unsigned char has_skey : 1; unsigned char has_kss : 1; unsigned char has_gisaf : 1; + unsigned char has_diag318 : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 53a5316cc4b7..7edaa733a77f 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -45,6 +45,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" }, [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" }, [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" }, + [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" }, [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, }; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 72dd23ef771b..5295e536515b 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -989,6 +990,25 @@ static void __init setup_task_size(void) arch_task_struct_size = task_size; } +/* + * Issue diagnose 318 to set the control program name and + * version codes. + */ +static void __init setup_control_program_code(void) +{ + union diag318_info diag318_info = { + .cpnc = CPNC_LINUX, + .cpvc_linux = 0, + .cpvc_distro = {0}, + }; + + if (!sclp.has_diag318) + return; + + diag_stat_inc(DIAG_STAT_X318); + asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val)); +} + /* * Setup function called from init/main.c just after the banner * was printed. @@ -1031,6 +1051,7 @@ void __init setup_arch(char **cmdline_p) os_info_init(); setup_ipl(); setup_task_size(); + setup_control_program_code(); /* Do some memory reservations *before* memory is added to memblock */ reserve_memory_end(); diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index b3fcc24b1182..367e9d384d85 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -195,7 +195,9 @@ struct read_info_sccb { u16 hcpua; /* 120-121 */ u8 _pad_122[124 - 122]; /* 122-123 */ u32 hmfai; /* 124-127 */ - u8 _pad_128[4096 - 128]; /* 128-4095 */ + u8 _pad_128[134 - 128]; /* 128-133 */ + u8 byte_134; /* 134 */ + u8 _pad_135[4096 - 135]; /* 135-4095 */ } __packed __aligned(PAGE_SIZE); struct read_storage_sccb { diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index e792cee3b51c..8332788681c4 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -44,6 +44,8 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; if (sccb->fac91 & 0x40) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST; + if (sccb->cpuoff > 134) + sclp.has_diag318 = !!(sccb->byte_134 & 0x80); sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; sclp.rzm <<= 20; From f36108c46288ddd625688d852935490688069f71 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:00 +0100 Subject: [PATCH 11/54] s390/hypfs: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs.h | 6 +++--- arch/s390/hypfs/hypfs_dbfs.c | 8 ++------ arch/s390/hypfs/hypfs_diag.c | 9 ++++----- arch/s390/hypfs/hypfs_diag0c.c | 3 ++- arch/s390/hypfs/hypfs_sprp.c | 6 +++--- arch/s390/hypfs/hypfs_vm.c | 3 ++- arch/s390/hypfs/inode.c | 11 +++-------- 7 files changed, 19 insertions(+), 27 deletions(-) diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index 52348e0a812e..05f3f9aee5fc 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -43,7 +43,7 @@ int hypfs_diag0c_init(void); void hypfs_diag0c_exit(void); /* Set Partition-Resource Parameter */ -int hypfs_sprp_init(void); +void hypfs_sprp_init(void); void hypfs_sprp_exit(void); /* debugfs interface */ @@ -69,9 +69,9 @@ struct hypfs_dbfs_file { struct dentry *dentry; }; -extern int hypfs_dbfs_init(void); +extern void hypfs_dbfs_init(void); extern void hypfs_dbfs_exit(void); -extern int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df); +extern void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df); extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df); #endif /* _HYPFS_H_ */ diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index b9bdf5c1918e..f4c7dbfaf8ee 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -78,14 +78,11 @@ static const struct file_operations dbfs_ops = { .unlocked_ioctl = dbfs_ioctl, }; -int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) +void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) { df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, &dbfs_ops); - if (IS_ERR(df->dentry)) - return PTR_ERR(df->dentry); mutex_init(&df->lock); - return 0; } void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) @@ -93,10 +90,9 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) debugfs_remove(df->dentry); } -int hypfs_dbfs_init(void) +void hypfs_dbfs_init(void) { dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); - return PTR_ERR_OR_ZERO(dbfs_dir); } void hypfs_dbfs_exit(void) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 3452e18bb1ca..f0bc4dc3e9bf 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -440,11 +440,10 @@ __init int hypfs_diag_init(void) pr_err("The hardware system does not support hypfs\n"); return -ENODATA; } - if (diag204_info_type == DIAG204_INFO_EXT) { - rc = hypfs_dbfs_create_file(&dbfs_file_d204); - if (rc) - return rc; - } + + if (diag204_info_type == DIAG204_INFO_EXT) + hypfs_dbfs_create_file(&dbfs_file_d204); + if (MACHINE_IS_LPAR) { rc = diag224_get_name_table(); if (rc) { diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c index 9c398bf0ddc6..72e3140fafb5 100644 --- a/arch/s390/hypfs/hypfs_diag0c.c +++ b/arch/s390/hypfs/hypfs_diag0c.c @@ -124,7 +124,8 @@ int __init hypfs_diag0c_init(void) { if (!MACHINE_IS_VM) return 0; - return hypfs_dbfs_create_file(&dbfs_file_0c); + hypfs_dbfs_create_file(&dbfs_file_0c); + return 0; } /* diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c index 601b70786dc8..7d9fb496d155 100644 --- a/arch/s390/hypfs/hypfs_sprp.c +++ b/arch/s390/hypfs/hypfs_sprp.c @@ -137,11 +137,11 @@ static struct hypfs_dbfs_file hypfs_sprp_file = { .unlocked_ioctl = hypfs_sprp_ioctl, }; -int hypfs_sprp_init(void) +void hypfs_sprp_init(void) { if (!sclp.has_sprp) - return 0; - return hypfs_dbfs_create_file(&hypfs_sprp_file); + return; + hypfs_dbfs_create_file(&hypfs_sprp_file); } void hypfs_sprp_exit(void) diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index c4b7b681e055..42f2375c203e 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -279,7 +279,8 @@ int hypfs_vm_init(void) guest_query = local_guest; else return -EACCES; - return hypfs_dbfs_create_file(&dbfs_file_2fc); + hypfs_dbfs_create_file(&dbfs_file_2fc); + return 0; } void hypfs_vm_exit(void) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index c681329fdeec..ccad1398abd4 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -456,9 +456,8 @@ static int __init hypfs_init(void) { int rc; - rc = hypfs_dbfs_init(); - if (rc) - return rc; + hypfs_dbfs_init(); + if (hypfs_diag_init()) { rc = -ENODATA; goto fail_dbfs_exit; @@ -467,10 +466,7 @@ static int __init hypfs_init(void) rc = -ENODATA; goto fail_hypfs_diag_exit; } - if (hypfs_sprp_init()) { - rc = -ENODATA; - goto fail_hypfs_vm_exit; - } + hypfs_sprp_init(); if (hypfs_diag0c_init()) { rc = -ENODATA; goto fail_hypfs_sprp_exit; @@ -489,7 +485,6 @@ fail_hypfs_diag0c_exit: hypfs_diag0c_exit(); fail_hypfs_sprp_exit: hypfs_sprp_exit(); -fail_hypfs_vm_exit: hypfs_vm_exit(); fail_hypfs_diag_exit: hypfs_diag_exit(); From d7f2f7c7fc5133c82865bc39346805b1b8459c31 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:01 +0100 Subject: [PATCH 12/54] s390: pci: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Sebastian Ott Cc: Gerald Schaefer Cc: linux-s390@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 1 - arch/s390/pci/pci_debug.c | 13 +++---------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 10fe982f2b4b..4e0efebc56a9 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -148,7 +148,6 @@ struct zpci_dev { enum pci_bus_speed max_bus_speed; struct dentry *debugfs_dev; - struct dentry *debugfs_perf; struct s390_domain *s390_domain; /* s390 IOMMU domain data */ }; diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 04388a254ffb..6b48ca7760a7 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -172,21 +172,14 @@ static const struct file_operations debugfs_pci_perf_fops = { void zpci_debug_init_device(struct zpci_dev *zdev, const char *name) { zdev->debugfs_dev = debugfs_create_dir(name, debugfs_root); - if (IS_ERR(zdev->debugfs_dev)) - zdev->debugfs_dev = NULL; - zdev->debugfs_perf = debugfs_create_file("statistics", - S_IFREG | S_IRUGO | S_IWUSR, - zdev->debugfs_dev, zdev, - &debugfs_pci_perf_fops); - if (IS_ERR(zdev->debugfs_perf)) - zdev->debugfs_perf = NULL; + debugfs_create_file("statistics", S_IFREG | S_IRUGO | S_IWUSR, + zdev->debugfs_dev, zdev, &debugfs_pci_perf_fops); } void zpci_debug_exit_device(struct zpci_dev *zdev) { - debugfs_remove(zdev->debugfs_perf); - debugfs_remove(zdev->debugfs_dev); + debugfs_remove_recursive(zdev->debugfs_dev); } int __init zpci_debug_init(void) From 7dd541a3fb34b34f0f25154dd0d24d0d9b1ca0f3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 22 Jan 2019 16:21:02 +0100 Subject: [PATCH 13/54] s390: no need to check return value of debugfs_create functions When calling debugfs functions, there is no need to ever check the return value. The function can work or not, but the code logic should never do something different based on this. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Kees Cook Cc: Christian Borntraeger Cc: Hendrik Brueckner Cc: linux-s390@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/debug.c | 6 ------ arch/s390/kernel/kdebugfs.c | 2 -- arch/s390/kernel/sysinfo.c | 2 -- 3 files changed, 10 deletions(-) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index d374f9b218b4..0ebf08c3b35e 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1056,12 +1056,6 @@ int debug_register_view(debug_info_t *id, struct debug_view *view) mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry, id, &debug_file_ops); - if (!pde) { - pr_err("Registering view %s/%s failed due to out of " - "memory\n", id->name, view->name); - rc = -1; - goto out; - } spin_lock_irqsave(&id->lock, flags); for (i = 0; i < DEBUG_MAX_VIEWS; i++) { if (!id->views[i]) diff --git a/arch/s390/kernel/kdebugfs.c b/arch/s390/kernel/kdebugfs.c index 2c46bd6c6fd2..33130c7daf55 100644 --- a/arch/s390/kernel/kdebugfs.c +++ b/arch/s390/kernel/kdebugfs.c @@ -9,8 +9,6 @@ EXPORT_SYMBOL(arch_debugfs_dir); static int __init arch_kdebugfs_init(void) { arch_debugfs_dir = debugfs_create_dir("s390", NULL); - if (IS_ERR(arch_debugfs_dir)) - arch_debugfs_dir = NULL; return 0; } postcore_initcall(arch_kdebugfs_init); diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 12f80d1f0415..2ac3c9b56a13 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -545,8 +545,6 @@ static __init int stsi_init_debugfs(void) int lvl, i; stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir); - if (IS_ERR_OR_NULL(stsi_root)) - return 0; lvl = stsi(NULL, 0, 0, 0); if (lvl > 0) stsi_0_0_0 = lvl; From 405d566f98ae19ccf624b31b6eccd6039d9cbc39 Mon Sep 17 00:00:00 2001 From: Farhan Ali Date: Mon, 21 Jan 2019 09:54:08 -0500 Subject: [PATCH 14/54] vfio-ccw: Don't assume there are more ccws after a TIC When trying to calculate the length of a ccw chain, we assume there are ccws after a TIC. This can lead to overcounting and copying garbage data from guest memory. Signed-off-by: Farhan Ali Message-Id: Reviewed-by: Halil Pasic Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 70a006ba4d05..ba08fe137c2e 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -392,7 +392,7 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp) return -EOPNOTSUPP; } - if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw))) + if (!ccw_is_chain(ccw)) break; ccw++; From ea0ca93d6a306151c43c493cc9fe0b0d221f651a Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 29 Jan 2019 15:15:11 +0100 Subject: [PATCH 15/54] s390/setup: remove obsolete #ifdef The #ifdef CONFIG_DMA_API_DEBUG check in reserve_kernel() is no longer needed, since commit ea535e418c01 ("dma-debug: switch check from _text to _stext") changed the logic in lib/dma-debug.c, so remove it. Reviewed-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 5295e536515b..0ff75b3750ea 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -795,18 +795,9 @@ static void __init reserve_kernel(void) { unsigned long start_pfn = PFN_UP(__pa(_end)); -#ifdef CONFIG_DMA_API_DEBUG - /* - * DMA_API_DEBUG code stumbles over addresses from the - * range [PARMAREA_END, _stext]. Mark the memory as reserved - * so it is not used for CONFIG_DMA_API_DEBUG=y. - */ - memblock_reserve(0, PFN_PHYS(start_pfn)); -#else memblock_reserve(0, PARMAREA_END); memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - (unsigned long)_stext); -#endif } static void __init setup_memory(void) From d4192437d75ae4f401aac5d54229756829015e1d Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 29 Jan 2019 15:17:11 +0100 Subject: [PATCH 16/54] s390: remove dead code Remove some dead code from head64.S, which was left over since commit da292bbe1f62 ("[S390] eliminate ipl_device from lowcore") removed ipl_device from lowcore. Reviewed-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head64.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 57bba24b1c27..56491e636eab 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -27,8 +27,6 @@ ENTRY(startup_continue) mvc 0(16,%r1),__LC_BOOT_CLOCK larl %r13,.LPG1 # get base lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers - lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area - # move IPL device to lowcore larl %r0,boot_vdso_data stg %r0,__LC_VDSO_PER_CPU # From a0308c1315e72b6fdce7b419c4546dad568b3a83 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 29 Jan 2019 10:48:09 +0100 Subject: [PATCH 17/54] s390/mmap: take stack_guard_gap into account for mmap_base The s390 version of the mmap_base function is ignorant of stack_guard_gap which can lead to a placement of the stack vs. the mmap base that does not leave enough space for the stack rlimit. Add the stack_guard_gap to the calculation and while we are at it the check for gap+pad overflows as well. Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/mmap.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 0a7627cdb34e..687f2a4d3459 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -29,14 +29,6 @@ static unsigned long stack_maxrandom_size(void) return STACK_RND_MASK << PAGE_SHIFT; } -/* - * Top of mmap area (just below the process stack). - * - * Leave at least a ~32 MB hole. - */ -#define MIN_GAP (32*1024*1024) -#define MAX_GAP (STACK_TOP/6*5) - static inline int mmap_is_legacy(struct rlimit *rlim_stack) { if (current->personality & ADDR_COMPAT_LAYOUT) @@ -60,13 +52,26 @@ static inline unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { unsigned long gap = rlim_stack->rlim_cur; + unsigned long pad = stack_maxrandom_size() + stack_guard_gap; + unsigned long gap_min, gap_max; - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - gap &= PAGE_MASK; - return STACK_TOP - stack_maxrandom_size() - rnd - gap; + /* Values close to RLIM_INFINITY can overflow. */ + if (gap + pad > gap) + gap += pad; + + /* + * Top of mmap area (just below the process stack). + * Leave at least a ~32 MB hole. + */ + gap_min = 32 * 1024 * 1024UL; + gap_max = (STACK_TOP / 6) * 5; + + if (gap < gap_min) + gap = gap_min; + else if (gap > gap_max) + gap = gap_max; + + return PAGE_ALIGN(STACK_TOP - gap - rnd); } unsigned long From e8e25a7718cf64701ddf7f7b2e31c79815b613f1 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 31 Jul 2018 09:59:09 -0400 Subject: [PATCH 18/54] s390/pci: improve bar check Improve the bar check in pci_iomap_range to cover functions for which we recognize more bars than what we can access due to AR restrictions. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index a966d7bfac57..353161c2e309 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -285,7 +285,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev, struct zpci_dev *zdev = to_zpci(pdev); int idx; - if (!pci_resource_len(pdev, bar)) + if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT) return NULL; idx = zdev->bars[bar].map_idx; From cfbb4a7ab6bd5df7aca826b92ebb3565efd3d801 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 12 Sep 2018 12:47:37 +0200 Subject: [PATCH 19/54] s390/pci: map IOV resources Map IOV resources such that pci common code recognizes the IOV capability of PFs. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 353161c2e309..6b054ce8c9b6 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -482,6 +482,15 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) } } +#ifdef CONFIG_PCI_IOV +static struct resource iov_res = { + .name = "PCI IOV res", + .start = 0, + .end = -1, + .flags = IORESOURCE_MEM, +}; +#endif + static void zpci_map_resources(struct pci_dev *pdev) { resource_size_t len; @@ -495,6 +504,17 @@ static void zpci_map_resources(struct pci_dev *pdev) (resource_size_t __force) pci_iomap(pdev, i, 0); pdev->resource[i].end = pdev->resource[i].start + len - 1; } + +#ifdef CONFIG_PCI_IOV + i = PCI_IOV_RESOURCES; + + for (; i < PCI_SRIOV_NUM_BARS + PCI_IOV_RESOURCES; i++) { + len = pci_resource_len(pdev, i); + if (!len) + continue; + pdev->resource[i].parent = &iov_res; + } +#endif } static void zpci_unmap_resources(struct pci_dev *pdev) From bdf117674ef14664f7d943b141e3b366dee2fea0 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 28 Jan 2019 16:11:13 +0100 Subject: [PATCH 20/54] s390/qdio: make SBAL address array type-safe There is no need to use void pointers, all drivers are in agreement about the underlying data structure of the SBAL arrays. Signed-off-by: Julian Wiedmann Reviewed-by: Benjamin Block Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/qdio.h | 4 ++-- drivers/s390/cio/qdio_setup.c | 6 +++--- drivers/s390/net/qeth_core_main.c | 4 ++-- drivers/s390/scsi/zfcp_qdio.c | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index d46edde7e458..db5ef22c46e4 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -361,8 +361,8 @@ struct qdio_initialize { unsigned long); int scan_threshold; unsigned long int_parm; - void **input_sbal_addr_array; - void **output_sbal_addr_array; + struct qdio_buffer **input_sbal_addr_array; + struct qdio_buffer **output_sbal_addr_array; struct qdio_outbuf_state *output_sbal_state_array; }; diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index e324d890a4f6..a59887fad13e 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -181,7 +181,7 @@ static void setup_queues_misc(struct qdio_q *q, struct qdio_irq *irq_ptr, } static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr, - void **sbals_array, int i) + struct qdio_buffer **sbals_array, int i) { struct qdio_q *prev; int j; @@ -212,8 +212,8 @@ static void setup_queues(struct qdio_irq *irq_ptr, struct qdio_initialize *qdio_init) { struct qdio_q *q; - void **input_sbal_array = qdio_init->input_sbal_addr_array; - void **output_sbal_array = qdio_init->output_sbal_addr_array; + struct qdio_buffer **input_sbal_array = qdio_init->input_sbal_addr_array; + struct qdio_buffer **output_sbal_array = qdio_init->output_sbal_addr_array; struct qdio_outbuf_state *output_sbal_state_array = qdio_init->output_sbal_state_array; int i; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index e63e03143ca7..2b68022b027b 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4979,8 +4979,8 @@ static int qeth_qdio_establish(struct qeth_card *card) init_data.output_handler = qeth_qdio_output_handler; init_data.queue_start_poll_array = queue_start_poll; init_data.int_parm = (unsigned long) card; - init_data.input_sbal_addr_array = (void **) in_sbal_ptrs; - init_data.output_sbal_addr_array = (void **) out_sbal_ptrs; + init_data.input_sbal_addr_array = in_sbal_ptrs; + init_data.output_sbal_addr_array = out_sbal_ptrs; init_data.output_sbal_state_array = card->qdio.out_bufstates; init_data.scan_threshold = (card->info.type == QETH_CARD_TYPE_IQD) ? 1 : 32; diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 10c4e8e3fd59..661436a92f8e 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -294,8 +294,8 @@ static void zfcp_qdio_setup_init_data(struct qdio_initialize *id, id->input_handler = zfcp_qdio_int_resp; id->output_handler = zfcp_qdio_int_req; id->int_parm = (unsigned long) qdio; - id->input_sbal_addr_array = (void **) (qdio->res_q); - id->output_sbal_addr_array = (void **) (qdio->req_q); + id->input_sbal_addr_array = qdio->res_q; + id->output_sbal_addr_array = qdio->req_q; id->scan_threshold = QDIO_MAX_BUFFERS_PER_Q - ZFCP_QDIO_MAX_SBALS_PER_REQ * 2; } From f8b11e089aeb63303daa0c05e4575df7075b1122 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 1 Feb 2019 02:02:52 +0000 Subject: [PATCH 21/54] s390: remove unused including Remove including that don't need it. Signed-off-by: YueHaibing Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0ff75b3750ea..493876daf51a 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include From 142c52d7bce45d335f48d53fdbf428bb15cf3924 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 6 Feb 2019 18:06:03 +0100 Subject: [PATCH 22/54] s390: add alignment hints to vector load and store The z14 introduced alignment hints to increase the performance of vector loads and stores. The kernel uses an implicit alignmenet of 8 bytes for the vector registers, set the alignment hint to 3. Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/vx-insn.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h index 266a72320e05..0c05a673811c 100644 --- a/arch/s390/include/asm/vx-insn.h +++ b/arch/s390/include/asm/vx-insn.h @@ -363,23 +363,23 @@ .endm /* VECTOR LOAD MULTIPLE */ -.macro VLM vfrom, vto, disp, base +.macro VLM vfrom, vto, disp, base, hint=3 VX_NUM v1, \vfrom VX_NUM v3, \vto GR_NUM b2, \base /* Base register */ .word 0xE700 | ((v1&15) << 4) | (v3&15) .word (b2 << 12) | (\disp) - MRXBOPC 0, 0x36, v1, v3 + MRXBOPC \hint, 0x36, v1, v3 .endm /* VECTOR STORE MULTIPLE */ -.macro VSTM vfrom, vto, disp, base +.macro VSTM vfrom, vto, disp, base, hint=3 VX_NUM v1, \vfrom VX_NUM v3, \vto GR_NUM b2, \base /* Base register */ .word 0xE700 | ((v1&15) << 4) | (v3&15) .word (b2 << 12) | (\disp) - MRXBOPC 0, 0x3E, v1, v3 + MRXBOPC \hint, 0x3E, v1, v3 .endm /* VECTOR PERMUTE */ From b1af7528d27e450a1a4492678454b15177398a26 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 5 Feb 2019 17:22:36 +0100 Subject: [PATCH 23/54] s390/zcrypt: use new state UNBOUND during queue driver rebind When an alternate driver (vfio-ap) has bound an ap queue and this binding is revised the ap queue device is in an intermittent state not bound to any driver. The internal state variable covered this with the state AP_STATE_BORKED which is also used to reflect broken devices. When now an ap bus scan runs such a device is destroyed and on the next scan reconstructed. So a stress test with high frequency switching the queue driver between the default and the vfio-ap driver hit this gap and the queue was removed until the next ap bus scan. This fix now introduces another state for the in-between condition for a queue momentary not bound to a driver and so the ap bus scan function skips this device instead of removing it. Also some very slight but maybe helpful debug feature messages come with this patch - in particular a message showing that a broken card/queue device will get removed. Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 19 +++++++++++++++++-- drivers/s390/crypto/ap_bus.h | 3 ++- drivers/s390/crypto/ap_queue.c | 7 ++++++- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 48ea0004a56d..f07632da815e 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1334,6 +1334,16 @@ static int __match_queue_device_with_qid(struct device *dev, void *data) return is_queue_dev(dev) && to_ap_queue(dev)->qid == (int)(long) data; } +/* + * Helper function to be used with bus_find_dev + * matches any queue device with given queue id + */ +static int __match_queue_device_with_queue_id(struct device *dev, void *data) +{ + return is_queue_dev(dev) + && AP_QID_QUEUE(to_ap_queue(dev)->qid) == (int)(long) data; +} + /* * Helper function for ap_scan_bus(). * Does the scan bus job for the given adapter id. @@ -1434,8 +1444,13 @@ static void _ap_scan_bus_adapter(int id) borked = aq->state == AP_STATE_BORKED; spin_unlock_bh(&aq->lock); } - if (borked) /* Remove broken device */ + if (borked) { + /* Remove broken device */ + AP_DBF(DBF_DEBUG, + "removing broken queue=%02x.%04x\n", + id, dom); device_unregister(dev); + } put_device(dev); continue; } @@ -1505,7 +1520,7 @@ static void ap_scan_bus(struct work_struct *unused) struct device *dev = bus_find_device(&ap_bus_type, NULL, (void *)(long) ap_domain_index, - __match_queue_device_with_qid); + __match_queue_device_with_queue_id); if (dev) put_device(dev); else diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index bfc66e4a9de1..d0059eae5d94 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -91,7 +91,8 @@ enum ap_state { AP_STATE_WORKING, AP_STATE_QUEUE_FULL, AP_STATE_SUSPEND_WAIT, - AP_STATE_BORKED, + AP_STATE_UNBOUND, /* momentary not bound to a driver */ + AP_STATE_BORKED, /* broken */ NR_AP_STATES }; diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 576ac08777c5..ba261210c6da 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -420,6 +420,10 @@ static ap_func_t *ap_jumptable[NR_AP_STATES][NR_AP_EVENTS] = { [AP_EVENT_POLL] = ap_sm_suspend_read, [AP_EVENT_TIMEOUT] = ap_sm_nop, }, + [AP_STATE_UNBOUND] = { + [AP_EVENT_POLL] = ap_sm_nop, + [AP_EVENT_TIMEOUT] = ap_sm_nop, + }, [AP_STATE_BORKED] = { [AP_EVENT_POLL] = ap_sm_nop, [AP_EVENT_TIMEOUT] = ap_sm_nop, @@ -725,6 +729,7 @@ static void __ap_flush_queue(struct ap_queue *aq) ap_msg->rc = -EAGAIN; ap_msg->receive(aq, ap_msg, NULL); } + aq->queue_count = 0; } void ap_flush_queue(struct ap_queue *aq) @@ -743,7 +748,7 @@ void ap_queue_remove(struct ap_queue *aq) /* reset with zero, also clears irq registration */ spin_lock_bh(&aq->lock); ap_zapq(aq->qid); - aq->state = AP_STATE_BORKED; + aq->state = AP_STATE_UNBOUND; spin_unlock_bh(&aq->lock); } EXPORT_SYMBOL(ap_queue_remove); From 146448524bddbf6dfc62de31957e428de001cbda Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 6 Feb 2019 12:35:58 +0100 Subject: [PATCH 24/54] s390/jump_label: Use "jdd" constraint on gcc9 [heiko.carstens@de.ibm.com]: ----- Laura Abbott reported that the kernel doesn't build anymore with gcc 9, due to the "X" constraint. Ilya provided the gcc 9 patch "S/390: Introduce jdd constraint" which introduces the new "jdd" constraint which fixes this. ----- The support for section anchors on S/390 introduced in gcc9 has changed the behavior of "X" constraint, which can now produce register references. Since existing constraints, in particular, "i", do not fit the intended use case on S/390, the new machine-specific "jdd" constraint was introduced. This patch makes jump labels use "jdd" constraint when building with gcc9. Reported-by: Laura Abbott Signed-off-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/jump_label.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index e2d3e6c43395..e548ec1ec12c 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -10,6 +10,12 @@ #define JUMP_LABEL_NOP_SIZE 6 #define JUMP_LABEL_NOP_OFFSET 2 +#if __GNUC__ < 9 +#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "X" +#else +#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "jdd" +#endif + /* * We use a brcl 0,2 instruction for jump labels at compile time so it * can be easily distinguished from a hotpatch generated instruction. @@ -20,9 +26,9 @@ static inline bool arch_static_branch(struct static_key *key, bool branch) ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" - ".quad %0-.\n" + ".quad %0+%1-.\n" ".popsection\n" - : : "X" (&((char *)key)[branch]) : : label); + : : JUMP_LABEL_STATIC_KEY_CONSTRAINT (key), "i" (branch) : : label); return false; label: return true; @@ -34,9 +40,9 @@ static inline bool arch_static_branch_jump(struct static_key *key, bool branch) ".pushsection __jump_table,\"aw\"\n" ".balign 8\n" ".long 0b-.,%l[label]-.\n" - ".quad %0-.\n" + ".quad %0+%1-.\n" ".popsection\n" - : : "X" (&((char *)key)[branch]) : : label); + : : JUMP_LABEL_STATIC_KEY_CONSTRAINT (key), "i" (branch) : : label); return false; label: return true; From 8727638426b0aea59d7f904ad8ddf483f9234f88 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 14 Feb 2019 15:40:56 +0100 Subject: [PATCH 25/54] s390/setup: fix early warning messages The setup_lowcore() function creates a new prefix page for the boot CPU. The PSW mask for the system_call, external interrupt, i/o interrupt and the program check handler have the DAT bit set in this new prefix page. At the time setup_lowcore is called the system still runs without virtual address translation, the paging_init() function creates the kernel page table and loads the CR13 with the kernel ASCE. Any code between setup_lowcore() and the end of paging_init() that has a BUG or WARN statement will create a program check that can not be handled correctly as there is no kernel page table yet. To allow early WARN statements initially setup the lowcore with DAT off and set the DAT bit only after paging_init() has completed. Cc: stable@vger.kernel.org Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 493876daf51a..e89659f46401 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -369,7 +369,7 @@ void __init arch_call_rest_init(void) : : [_frame] "a" (frame)); } -static void __init setup_lowcore(void) +static void __init setup_lowcore_dat_off(void) { struct lowcore *lc; @@ -380,19 +380,16 @@ static void __init setup_lowcore(void) lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc)); lc->restart_psw.mask = PSW_KERNEL_BITS; lc->restart_psw.addr = (unsigned long) restart_int_handler; - lc->external_new_psw.mask = PSW_KERNEL_BITS | - PSW_MASK_DAT | PSW_MASK_MCHECK; + lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->external_new_psw.addr = (unsigned long) ext_int_handler; lc->svc_new_psw.mask = PSW_KERNEL_BITS | - PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; lc->svc_new_psw.addr = (unsigned long) system_call; - lc->program_new_psw.mask = PSW_KERNEL_BITS | - PSW_MASK_DAT | PSW_MASK_MCHECK; + lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->program_new_psw.addr = (unsigned long) pgm_check_handler; lc->mcck_new_psw.mask = PSW_KERNEL_BITS; lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = PSW_KERNEL_BITS | - PSW_MASK_DAT | PSW_MASK_MCHECK; + lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->io_new_psw.addr = (unsigned long) io_int_handler; lc->clock_comparator = clock_comparator_max; lc->nodat_stack = ((unsigned long) &init_thread_union) @@ -452,6 +449,17 @@ static void __init setup_lowcore(void) lowcore_ptr[0] = lc; } +static void __init setup_lowcore_dat_on(void) +{ + struct lowcore *lc; + + lc = lowcore_ptr[0]; + lc->external_new_psw.mask |= PSW_MASK_DAT; + lc->svc_new_psw.mask |= PSW_MASK_DAT; + lc->program_new_psw.mask |= PSW_MASK_DAT; + lc->io_new_psw.mask |= PSW_MASK_DAT; +} + static struct resource code_resource = { .name = "Kernel code", .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, @@ -1081,7 +1089,7 @@ void __init setup_arch(char **cmdline_p) #endif setup_resources(); - setup_lowcore(); + setup_lowcore_dat_off(); smp_fill_possible_mask(); cpu_detect_mhz_feature(); cpu_init(); @@ -1094,6 +1102,12 @@ void __init setup_arch(char **cmdline_p) */ paging_init(); + /* + * After paging_init created the kernel page table, the new PSWs + * in lowcore can now run with DAT enabled. + */ + setup_lowcore_dat_on(); + /* Setup default console */ conmode_default(); set_preferred_console(); From 86a86804e4f18fc3880541b3d5a07f4df0fe29cb Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 18 Feb 2019 18:10:08 +0100 Subject: [PATCH 26/54] s390/setup: fix boot crash for machine without EDAT-1 The fix to make WARN work in the early boot code created a problem on older machines without EDAT-1. The setup_lowcore_dat_on function uses the pointer from lowcore_ptr[0] to set the DAT bit in the new PSWs. That does not work if the kernel page table is set up with 4K pages as the prefix address maps to absolute zero. To make this work the PSWs need to be changed with via address 0 in form of the S390_lowcore definition. Reported-by: Guenter Roeck Tested-by: Cornelia Huck Fixes: 94f85ed3e2f8 ("s390/setup: fix early warning messages") Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e89659f46401..4e91dc172024 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -451,13 +451,12 @@ static void __init setup_lowcore_dat_off(void) static void __init setup_lowcore_dat_on(void) { - struct lowcore *lc; - - lc = lowcore_ptr[0]; - lc->external_new_psw.mask |= PSW_MASK_DAT; - lc->svc_new_psw.mask |= PSW_MASK_DAT; - lc->program_new_psw.mask |= PSW_MASK_DAT; - lc->io_new_psw.mask |= PSW_MASK_DAT; + __ctl_clear_bit(0, 28); + S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; + S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT; + S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT; + S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; + __ctl_set_bit(0, 28); } static struct resource code_resource = { From 0ff06c44efeede4acd068847d3bf8cf894b6c664 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 14 Feb 2019 14:46:23 +0100 Subject: [PATCH 27/54] s390/ism: ignore some errors during deregistration Prior to dma unmap/free operations the ism driver tries to ensure that the memory is no longer accessed by the HW. When errors during deregistration of memory regions from the HW occur the ism driver will not unmap/free this memory. When we receive notification from the hypervisor that a PCI function has been detached we can no longer access the device and would never unmap/free these memory regions which led to complaints by the DMA debug API. Treat this kind of errors during the deregistration of memory regions from the HW as success since it is already ensured that the memory is no longer accessed by HW. Reported-by: Karsten Graul Reported-by: Hans Wippel Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/net/ism_drv.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index ed8e58f09054..3e132592c1fe 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -141,10 +141,13 @@ static int register_ieq(struct ism_dev *ism) static int unregister_sba(struct ism_dev *ism) { + int ret; + if (!ism->sba) return 0; - if (ism_cmd_simple(ism, ISM_UNREG_SBA)) + ret = ism_cmd_simple(ism, ISM_UNREG_SBA); + if (ret && ret != ISM_ERROR) return -EIO; dma_free_coherent(&ism->pdev->dev, PAGE_SIZE, @@ -158,10 +161,13 @@ static int unregister_sba(struct ism_dev *ism) static int unregister_ieq(struct ism_dev *ism) { + int ret; + if (!ism->ieq) return 0; - if (ism_cmd_simple(ism, ISM_UNREG_IEQ)) + ret = ism_cmd_simple(ism, ISM_UNREG_IEQ); + if (ret && ret != ISM_ERROR) return -EIO; dma_free_coherent(&ism->pdev->dev, PAGE_SIZE, @@ -287,7 +293,7 @@ static int ism_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb) cmd.request.dmb_tok = dmb->dmb_tok; ret = ism_cmd(ism, &cmd); - if (ret) + if (ret && ret != ISM_ERROR) goto out; ism_free_dmb(ism, dmb); From ebb7c695d3bc7a4986b92edc8d9ef43491be183e Mon Sep 17 00:00:00 2001 From: Ingo Franzki Date: Wed, 20 Feb 2019 14:01:39 +0100 Subject: [PATCH 28/54] pkey: Indicate old mkvp only if old and current mkvp are different When the CCA master key is set twice with the same master key, then the old and the current master key are the same and thus the verification patterns are the same, too. The check to report if a secure key is currently wrapped by the old master key erroneously reports old mkvp in this case. Reviewed-by: Harald Freudenberger Signed-off-by: Ingo Franzki Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/pkey_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 2f92bbed4bf6..3e85d665c572 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -1079,7 +1079,7 @@ int pkey_verifykey(const struct pkey_seckey *seckey, rc = mkvp_cache_fetch(cardnr, domain, mkvp); if (rc) goto out; - if (t->mkvp == mkvp[1]) { + if (t->mkvp == mkvp[1] && t->mkvp != mkvp[0]) { DEBUG_DBG("%s secure key has old mkvp\n", __func__); if (pattributes) *pattributes |= PKEY_VERIFY_ATTR_OLD_MKVP; From ca57114609d1b9a141a67c2222a7b8edc156291a Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 21 Feb 2019 15:34:21 +0100 Subject: [PATCH 29/54] s390/extmem: remove code for 31 bit addressing mode All supported releases of z/VM allow 64 bit subcodes and addressing mode for diag 0x64. This patch removes a lot of code for handling 31 bit addressing mode and old subcodes. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/mm/extmem.c | 129 ++++-------------------------------------- 1 file changed, 12 insertions(+), 117 deletions(-) diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index eba2def3414d..86c8a3c8d910 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -28,12 +28,7 @@ #include #include -#define DCSS_LOADSHR 0x00 -#define DCSS_LOADNSR 0x04 #define DCSS_PURGESEG 0x08 -#define DCSS_FINDSEG 0x0c -#define DCSS_LOADNOLY 0x10 -#define DCSS_SEGEXT 0x18 #define DCSS_LOADSHRX 0x20 #define DCSS_LOADNSRX 0x24 #define DCSS_FINDSEGX 0x2c @@ -53,20 +48,6 @@ struct qout64 { struct qrange range[6]; }; -struct qrange_old { - unsigned int start; /* last byte type */ - unsigned int end; /* last byte reserved */ -}; - -/* output area format for the Diag x'64' old subcode x'18' */ -struct qout64_old { - int segstart; - int segend; - int segcnt; - int segrcnt; - struct qrange_old range[6]; -}; - struct qin64 { char qopcode; char rsrv1[3]; @@ -95,52 +76,10 @@ static DEFINE_MUTEX(dcss_lock); static LIST_HEAD(dcss_list); static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", "EW/EN-MIXED" }; -static int loadshr_scode, loadnsr_scode; -static int segext_scode, purgeseg_scode; -static int scode_set; - -/* set correct Diag x'64' subcodes. */ -static int -dcss_set_subcodes(void) -{ - char *name = kmalloc(8, GFP_KERNEL | GFP_DMA); - unsigned long rx, ry; - int rc; - - if (name == NULL) - return -ENOMEM; - - rx = (unsigned long) name; - ry = DCSS_FINDSEGX; - - strcpy(name, "dummy"); - diag_stat_inc(DIAG_STAT_X064); - asm volatile( - " diag %0,%1,0x64\n" - "0: ipm %2\n" - " srl %2,28\n" - " j 2f\n" - "1: la %2,3\n" - "2:\n" - EX_TABLE(0b, 1b) - : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc", "memory"); - - kfree(name); - /* Diag x'64' new subcodes are supported, set to new subcodes */ - if (rc != 3) { - loadshr_scode = DCSS_LOADSHRX; - loadnsr_scode = DCSS_LOADNSRX; - purgeseg_scode = DCSS_PURGESEG; - segext_scode = DCSS_SEGEXTX; - return 0; - } - /* Diag x'64' new subcodes are not supported, set to old subcodes */ - loadshr_scode = DCSS_LOADNOLY; - loadnsr_scode = DCSS_LOADNSR; - purgeseg_scode = DCSS_PURGESEG; - segext_scode = DCSS_SEGEXT; - return 0; -} +static int loadshr_scode = DCSS_LOADSHRX; +static int loadnsr_scode = DCSS_LOADNSRX; +static int purgeseg_scode = DCSS_PURGESEG; +static int segext_scode = DCSS_SEGEXTX; /* * Create the 8 bytes, ebcdic VM segment name from @@ -196,32 +135,15 @@ dcss_diag(int *func, void *parameter, unsigned long rx, ry; int rc; - if (scode_set == 0) { - rc = dcss_set_subcodes(); - if (rc < 0) - return rc; - scode_set = 1; - } rx = (unsigned long) parameter; ry = (unsigned long) *func; - /* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */ diag_stat_inc(DIAG_STAT_X064); - if (*func > DCSS_SEGEXT) - asm volatile( - " diag %0,%1,0x64\n" - " ipm %2\n" - " srl %2,28\n" - : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); - /* 31-bit Diag x'64' old subcode, switch to 31-bit addressing mode */ - else - asm volatile( - " sam31\n" - " diag %0,%1,0x64\n" - " sam64\n" - " ipm %2\n" - " srl %2,28\n" - : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); + asm volatile( + " diag %0,%1,0x64\n" + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc"); *ret1 = rx; *ret2 = ry; return rc; @@ -271,31 +193,6 @@ query_segment_type (struct dcss_segment *seg) goto out_free; } - /* Only old format of output area of Diagnose x'64' is supported, - copy data for the new format. */ - if (segext_scode == DCSS_SEGEXT) { - struct qout64_old *qout_old; - qout_old = kzalloc(sizeof(*qout_old), GFP_KERNEL | GFP_DMA); - if (qout_old == NULL) { - rc = -ENOMEM; - goto out_free; - } - memcpy(qout_old, qout, sizeof(struct qout64_old)); - qout->segstart = (unsigned long) qout_old->segstart; - qout->segend = (unsigned long) qout_old->segend; - qout->segcnt = qout_old->segcnt; - qout->segrcnt = qout_old->segrcnt; - - if (qout->segcnt > 6) - qout->segrcnt = 6; - for (i = 0; i < qout->segrcnt; i++) { - qout->range[i].start = - (unsigned long) qout_old->range[i].start; - qout->range[i].end = - (unsigned long) qout_old->range[i].end; - } - kfree(qout_old); - } if (qout->segcnt > 6) { rc = -EOPNOTSUPP; goto out_free; @@ -410,11 +307,9 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long if (rc < 0) goto out_free; - if (loadshr_scode == DCSS_LOADSHRX) { - if (segment_overlaps_others(seg)) { - rc = -EBUSY; - goto out_free; - } + if (segment_overlaps_others(seg)) { + rc = -EBUSY; + goto out_free; } rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1); From f1777625c5aa5c94274d5190084a54ca1d35d9bb Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 21 Feb 2019 15:36:31 +0100 Subject: [PATCH 30/54] s390/extmem: print DCSS range with %px The DCSS range is currently printed with %p, which results in hashed values instead of the actual addresses. Use %px instead, the DCSS ranges do not reveal any kernel symbol addresses. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/mm/extmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 86c8a3c8d910..0b5622714c12 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -367,11 +367,11 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *addr = seg->start_addr; *end = seg->end; if (do_nonshared) - pr_info("DCSS %s of range %p to %p and type %s loaded as " + pr_info("DCSS %s of range %px to %px and type %s loaded as " "exclusive-writable\n", name, (void*) seg->start_addr, (void*) seg->end, segtype_string[seg->vm_segtype]); else { - pr_info("DCSS %s of range %p to %p and type %s loaded in " + pr_info("DCSS %s of range %px to %px and type %s loaded in " "shared access mode\n", name, (void*) seg->start_addr, (void*) seg->end, segtype_string[seg->vm_segtype]); } From 30e145f811428e1a96738fbc1f98ccc91f412d93 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 6 Aug 2018 15:56:43 +0200 Subject: [PATCH 31/54] s390/cpum_cf: move counter set controls to a new header file Move counter set specific controls and functions to the asm/cpu_mcf.h header file containg all counter facility support definitions. Also adapt few variable names and header file includes. No functional changes. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mcf.h | 52 ++++++++++++++++++++++++++ arch/s390/include/asm/perf_event.h | 1 - arch/s390/kernel/perf_cpum_cf.c | 45 ++-------------------- arch/s390/kernel/perf_cpum_cf_events.c | 1 + 4 files changed, 56 insertions(+), 43 deletions(-) create mode 100644 arch/s390/include/asm/cpu_mcf.h diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h new file mode 100644 index 000000000000..b6e73fbf87d0 --- /dev/null +++ b/arch/s390/include/asm/cpu_mcf.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Counter facility support definitions for the Linux perf + * + * Copyright IBM Corp. 2019 + * Author(s): Hendrik Brueckner + */ +#ifndef _ASM_S390_CPU_MCF_H +#define _ASM_S390_CPU_MCF_H + +#include +#include + +enum cpumf_ctr_set { + CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */ + CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */ + CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */ + CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */ + CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */ + + /* Maximum number of counter sets */ + CPUMF_CTR_SET_MAX, +}; + +#define CPUMF_LCCTL_ENABLE_SHIFT 16 +#define CPUMF_LCCTL_ACTCTL_SHIFT 0 +static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = { + [CPUMF_CTR_SET_BASIC] = 0x02, + [CPUMF_CTR_SET_USER] = 0x04, + [CPUMF_CTR_SET_CRYPTO] = 0x08, + [CPUMF_CTR_SET_EXT] = 0x01, + [CPUMF_CTR_SET_MT_DIAG] = 0x20, +}; + +static inline void ctr_set_enable(u64 *state, int ctr_set) +{ + *state |= cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT; +} +static inline void ctr_set_disable(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT); +} +static inline void ctr_set_start(u64 *state, int ctr_set) +{ + *state |= cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT; +} +static inline void ctr_set_stop(u64 *state, int ctr_set) +{ + *state &= ~(cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); +} + +#endif /* _ASM_S390_CPU_MCF_H */ diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index b9c0e361748b..70240961df74 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -12,7 +12,6 @@ #include #include -#include /* Per-CPU flags for PMU states */ #define PMU_F_RESERVED 0x1000 diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index d5523adeddbf..66d945d5589b 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -10,52 +10,13 @@ #include #include -#include #include #include #include #include #include #include -#include - -enum cpumf_ctr_set { - CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */ - CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */ - CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */ - CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */ - CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */ - - /* Maximum number of counter sets */ - CPUMF_CTR_SET_MAX, -}; - -#define CPUMF_LCCTL_ENABLE_SHIFT 16 -#define CPUMF_LCCTL_ACTCTL_SHIFT 0 -static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = { - [CPUMF_CTR_SET_BASIC] = 0x02, - [CPUMF_CTR_SET_USER] = 0x04, - [CPUMF_CTR_SET_CRYPTO] = 0x08, - [CPUMF_CTR_SET_EXT] = 0x01, - [CPUMF_CTR_SET_MT_DIAG] = 0x20, -}; - -static void ctr_set_enable(u64 *state, int ctr_set) -{ - *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT; -} -static void ctr_set_disable(u64 *state, int ctr_set) -{ - *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT); -} -static void ctr_set_start(u64 *state, int ctr_set) -{ - *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT; -} -static void ctr_set_stop(u64 *state, int ctr_set) -{ - *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); -} +#include /* Local CPUMF event structure */ struct cpu_hw_events { @@ -135,7 +96,7 @@ static int validate_ctr_version(const struct hw_perf_event *hwc) * Thus, the counters can only be used if SMT is on and the * counter set is enabled and active. */ - mtdiag_ctl = cpumf_state_ctl[CPUMF_CTR_SET_MT_DIAG]; + mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]; if (!((cpuhw->info.auth_ctl & mtdiag_ctl) && (cpuhw->info.enable_ctl & mtdiag_ctl) && (cpuhw->info.act_ctl & mtdiag_ctl))) @@ -160,7 +121,7 @@ static int validate_ctr_auth(const struct hw_perf_event *hwc) * return with -ENOENT in order to fall back to other * PMUs that might suffice the event request. */ - ctrs_state = cpumf_state_ctl[hwc->config_base]; + ctrs_state = cpumf_ctr_ctl[hwc->config_base]; if (!(ctrs_state & cpuhw->info.auth_ctl)) err = -ENOENT; diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index d63fb3c56b8a..b45238c89728 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -6,6 +6,7 @@ #include #include +#include /* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */ From 3d33345aa3d9ab2ee9b5a5bf2a8842c43603d537 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 6 Aug 2018 17:43:07 +0200 Subject: [PATCH 32/54] s390/cpum_cf: prepare for in-kernel counter measurements Prepare the counter facility support to be used by other in-kernel users. The first step introduces the __kernel_cpumcf_begin() and __kernel_cpumcf_end() functions to reserve the counter facility for doing measurements and to release after the measurements are done. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mcf.h | 3 +++ arch/s390/kernel/perf_cpum_cf.c | 32 ++++++++++++++++++++++++++------ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h index b6e73fbf87d0..63fa74115cba 100644 --- a/arch/s390/include/asm/cpu_mcf.h +++ b/arch/s390/include/asm/cpu_mcf.h @@ -49,4 +49,7 @@ static inline void ctr_set_stop(u64 *state, int ctr_set) *state &= ~(cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); } +int __kernel_cpumcf_begin(void); +void __kernel_cpumcf_end(void); + #endif /* _ASM_S390_CPU_MCF_H */ diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 66d945d5589b..b2e46b8b881a 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -236,25 +236,45 @@ static void setup_pmc_cpu(void *flags) lcctl(0); } -/* Initialize the CPU-measurement facility */ -static int reserve_pmc_hardware(void) +/* Reserve/release functions for sharing perf hardware */ +static DEFINE_SPINLOCK(cpumcf_owner_lock); +static void *cpumcf_owner; + +/* Initialize the CPU-measurement counter facility */ +int __kernel_cpumcf_begin(void) { int flags = PMC_INIT; + int err = 0; + + spin_lock(&cpumcf_owner_lock); + if (cpumcf_owner) + err = -EBUSY; + else + cpumcf_owner = __builtin_return_address(0); + spin_unlock(&cpumcf_owner_lock); + if (err) + return err; on_each_cpu(setup_pmc_cpu, &flags, 1); irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); return 0; } +EXPORT_SYMBOL(__kernel_cpumcf_begin); -/* Release the CPU-measurement facility */ -static void release_pmc_hardware(void) +/* Release the CPU-measurement counter facility */ +void __kernel_cpumcf_end(void) { int flags = PMC_RELEASE; on_each_cpu(setup_pmc_cpu, &flags, 1); irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + spin_lock(&cpumcf_owner_lock); + cpumcf_owner = NULL; + spin_unlock(&cpumcf_owner_lock); } +EXPORT_SYMBOL(__kernel_cpumcf_end); /* Release the PMU if event is the last perf event */ static void hw_perf_event_destroy(struct perf_event *event) @@ -262,7 +282,7 @@ static void hw_perf_event_destroy(struct perf_event *event) if (!atomic_add_unless(&num_events, -1, 1)) { mutex_lock(&pmc_reserve_mutex); if (atomic_dec_return(&num_events) == 0) - release_pmc_hardware(); + __kernel_cpumcf_end(); mutex_unlock(&pmc_reserve_mutex); } } @@ -363,7 +383,7 @@ static int __hw_perf_event_init(struct perf_event *event) /* Initialize for using the CPU-measurement counter facility */ if (!atomic_inc_not_zero(&num_events)) { mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) + if (atomic_read(&num_events) == 0 && __kernel_cpumcf_begin()) err = -EBUSY; else atomic_inc(&num_events); From f1c0b83173e59c34daec48de92c0c2934e7417b2 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 8 Aug 2018 10:04:23 +0200 Subject: [PATCH 33/54] s390/cpum_cf: rename per-CPU counter facility structure and variables Rename the struct cpu_hw_events to cpu_cf_events and also the respective per-CPU variable to make its name more clear. No functional changes. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf.c | 40 ++++++++++++++++----------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index b2e46b8b881a..bae59455b86d 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -19,14 +19,14 @@ #include /* Local CPUMF event structure */ -struct cpu_hw_events { +struct cpu_cf_events { struct cpumf_ctr_info info; atomic_t ctr_set[CPUMF_CTR_SET_MAX]; u64 state, tx_state; unsigned int flags; unsigned int txn_flags; }; -static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { +static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = { .ctr_set = { [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), @@ -59,11 +59,11 @@ static enum cpumf_ctr_set get_counter_set(u64 event) static int validate_ctr_version(const struct hw_perf_event *hwc) { - struct cpu_hw_events *cpuhw; + struct cpu_cf_events *cpuhw; int err = 0; u16 mtdiag_ctl; - cpuhw = &get_cpu_var(cpu_hw_events); + cpuhw = &get_cpu_var(cpu_cf_events); /* check required version for counter sets */ switch (hwc->config_base) { @@ -104,17 +104,17 @@ static int validate_ctr_version(const struct hw_perf_event *hwc) break; } - put_cpu_var(cpu_hw_events); + put_cpu_var(cpu_cf_events); return err; } static int validate_ctr_auth(const struct hw_perf_event *hwc) { - struct cpu_hw_events *cpuhw; + struct cpu_cf_events *cpuhw; u64 ctrs_state; int err = 0; - cpuhw = &get_cpu_var(cpu_hw_events); + cpuhw = &get_cpu_var(cpu_cf_events); /* Check authorization for cpu counter sets. * If the particular CPU counter set is not authorized, @@ -125,7 +125,7 @@ static int validate_ctr_auth(const struct hw_perf_event *hwc) if (!(ctrs_state & cpuhw->info.auth_ctl)) err = -ENOENT; - put_cpu_var(cpu_hw_events); + put_cpu_var(cpu_cf_events); return err; } @@ -136,7 +136,7 @@ static int validate_ctr_auth(const struct hw_perf_event *hwc) */ static void cpumf_pmu_enable(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); int err; if (cpuhw->flags & PMU_F_ENABLED) @@ -159,7 +159,7 @@ static void cpumf_pmu_enable(struct pmu *pmu) */ static void cpumf_pmu_disable(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); int err; u64 inactive; @@ -187,13 +187,13 @@ static DEFINE_MUTEX(pmc_reserve_mutex); static void cpumf_measurement_alert(struct ext_code ext_code, unsigned int alert, unsigned long unused) { - struct cpu_hw_events *cpuhw; + struct cpu_cf_events *cpuhw; if (!(alert & CPU_MF_INT_CF_MASK)) return; inc_irq_stat(IRQEXT_CMC); - cpuhw = this_cpu_ptr(&cpu_hw_events); + cpuhw = this_cpu_ptr(&cpu_cf_events); /* Measurement alerts are shared and might happen when the PMU * is not reserved. Ignore these alerts in this case. */ @@ -218,7 +218,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code, #define PMC_RELEASE 1 static void setup_pmc_cpu(void *flags) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); switch (*((int *) flags)) { case PMC_INIT: @@ -469,7 +469,7 @@ static void cpumf_pmu_read(struct perf_event *event) static void cpumf_pmu_start(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); struct hw_perf_event *hwc = &event->hw; if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) @@ -500,7 +500,7 @@ static void cpumf_pmu_start(struct perf_event *event, int flags) static void cpumf_pmu_stop(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); struct hw_perf_event *hwc = &event->hw; if (!(hwc->state & PERF_HES_STOPPED)) { @@ -521,7 +521,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags) static int cpumf_pmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); /* Check authorization for the counter set to which this * counter belongs. @@ -545,7 +545,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) static void cpumf_pmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); cpumf_pmu_stop(event, PERF_EF_UPDATE); @@ -573,7 +573,7 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) */ static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ @@ -593,7 +593,7 @@ static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) static void cpumf_pmu_cancel_txn(struct pmu *pmu) { unsigned int txn_flags; - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ @@ -614,7 +614,7 @@ static void cpumf_pmu_cancel_txn(struct pmu *pmu) */ static int cpumf_pmu_commit_txn(struct pmu *pmu) { - struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); u64 state; WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ From f944bcdf5b8431c68be8bdd13259d27412e45c14 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 8 Aug 2018 10:12:22 +0200 Subject: [PATCH 34/54] s390/cpu_mf: move struct cpu_cf_events and per-CPU variable to header file Make the struct cpu_cf_events and the respective per-CPU variable available to in-kernel users. Access to this per-CPU variable shall be done between the calls to __kernel_cpumcf_begin() and __kernel_cpumcf_end(). Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mcf.h | 9 +++++++++ arch/s390/kernel/perf_cpum_cf.c | 11 ++--------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h index 63fa74115cba..82e0b80f2c81 100644 --- a/arch/s390/include/asm/cpu_mcf.h +++ b/arch/s390/include/asm/cpu_mcf.h @@ -49,6 +49,15 @@ static inline void ctr_set_stop(u64 *state, int ctr_set) *state &= ~(cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); } +struct cpu_cf_events { + struct cpumf_ctr_info info; + atomic_t ctr_set[CPUMF_CTR_SET_MAX]; + u64 state, tx_state; + unsigned int flags; + unsigned int txn_flags; +}; +DECLARE_PER_CPU(struct cpu_cf_events, cpu_cf_events); + int __kernel_cpumcf_begin(void); void __kernel_cpumcf_end(void); diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index bae59455b86d..758cbb1b84f5 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -18,15 +18,8 @@ #include #include -/* Local CPUMF event structure */ -struct cpu_cf_events { - struct cpumf_ctr_info info; - atomic_t ctr_set[CPUMF_CTR_SET_MAX]; - u64 state, tx_state; - unsigned int flags; - unsigned int txn_flags; -}; -static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = { +/* Per-CPU event structure for the counter facility */ +DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = { .ctr_set = { [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), From 26b8317f51a20c1e4f61fbd2cc68975faad10b02 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 8 Aug 2018 10:30:37 +0200 Subject: [PATCH 35/54] s390/cpum_cf: introduce kernel_cpumcf_alert() to obtain measurement alerts During a __kernel_cpumcf_begin()/end() session, save measurement alerts for the counter facility in the per-CPU cpu_cf_events variable. Users can obtain and, optionally, clear the alerts by calling kernel_cpumcf_alert() to specifically handle alerts. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mcf.h | 2 ++ arch/s390/kernel/perf_cpum_cf.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h index 82e0b80f2c81..2cd4f09ee268 100644 --- a/arch/s390/include/asm/cpu_mcf.h +++ b/arch/s390/include/asm/cpu_mcf.h @@ -52,6 +52,7 @@ static inline void ctr_set_stop(u64 *state, int ctr_set) struct cpu_cf_events { struct cpumf_ctr_info info; atomic_t ctr_set[CPUMF_CTR_SET_MAX]; + atomic64_t alert; u64 state, tx_state; unsigned int flags; unsigned int txn_flags; @@ -59,6 +60,7 @@ struct cpu_cf_events { DECLARE_PER_CPU(struct cpu_cf_events, cpu_cf_events); int __kernel_cpumcf_begin(void); +unsigned long kernel_cpumcf_alert(int clear); void __kernel_cpumcf_end(void); #endif /* _ASM_S390_CPU_MCF_H */ diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 758cbb1b84f5..c05f69142ce4 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -27,6 +27,7 @@ DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = { [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0), }, + .alert = ATOMIC64_INIT(0), .state = 0, .flags = 0, .txn_flags = 0, @@ -205,6 +206,9 @@ static void cpumf_measurement_alert(struct ext_code ext_code, if (alert & CPU_MF_INT_CF_MTDA) pr_warn("CPU[%i] MT counter data was lost\n", smp_processor_id()); + + /* store alert for special handling by in-kernel users */ + atomic64_or(alert, &cpuhw->alert); } #define PMC_INIT 0 @@ -255,6 +259,20 @@ int __kernel_cpumcf_begin(void) } EXPORT_SYMBOL(__kernel_cpumcf_begin); +/* Obtain the CPU-measurement alerts for the counter facility */ +unsigned long kernel_cpumcf_alert(int clear) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + unsigned long alert; + + alert = atomic64_read(&cpuhw->alert); + if (clear) + atomic64_set(&cpuhw->alert, 0); + + return alert; +} +EXPORT_SYMBOL(kernel_cpumcf_alert); + /* Release the CPU-measurement counter facility */ void __kernel_cpumcf_end(void) { From 17bebcc68eeea3e1189f712dcba39809ad0d7a86 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 8 Aug 2018 10:38:43 +0200 Subject: [PATCH 36/54] s390/cpum_cf: Add minimal in-kernel interface for counter measurements Introduce a minimal interface for doing counter measurements of small units of work within the kernel. Use the kernel_cpumcf_begin() function start a measurement session and, later, stop it with kernel_cpumcf_end(). During the measreument session, you can enable and start/stop counter sets by using ctr_set_* functions. To make these changes effective use the lcctl() function. You can then use the ecctr() function to extract counters from the different counter sets. Please note that you have to check whether the counter sets to be enabled are authorized. Note that when a measurement session is active, other users cannot perform counter measurements. In such cases, kernel_cpumcf_begin() indicates this with returning -EBUSY. If the counter facility is not available, kernel_cpumcf_begin() returns -ENODEV. Note that this interface is restricted to the current CPU and, thus, preemption must be turned off. Example: u32 state, err; u64 cycles, insn; err = kernel_cpumcf_begin(); if (err) goto out_busy; state = 0; ctr_set_enable(&state, CPUMF_CTR_SET_BASIC); ctr_set_start(&state, CPUMF_CTR_SET_BASIC); err = lcctl(state); if (err) goto ; /* ... do your work ... */ ctr_set_stop(&state, CPUMF_CTR_SET_BASIC); err = lcctl(state); if (err) goto out; cycles = insn = 0; ecctr(0, &cycles); ecctr(1, &insn); /* ... */ kernel_cpumcf_end(); out_busy: Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mcf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h index 2cd4f09ee268..12a4224560bc 100644 --- a/arch/s390/include/asm/cpu_mcf.h +++ b/arch/s390/include/asm/cpu_mcf.h @@ -63,4 +63,18 @@ int __kernel_cpumcf_begin(void); unsigned long kernel_cpumcf_alert(int clear); void __kernel_cpumcf_end(void); +static inline int kernel_cpumcf_begin(void) +{ + if (!cpum_cf_avail()) + return -ENODEV; + + preempt_disable(); + return __kernel_cpumcf_begin(); +} +static inline void kernel_cpumcf_end(void) +{ + __kernel_cpumcf_end(); + preempt_enable(); +} + #endif /* _ASM_S390_CPU_MCF_H */ From 778fb10ccc18b16c022be898d8497767c20ea7b5 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 29 Aug 2018 17:46:06 +0200 Subject: [PATCH 37/54] s390/cpu_mf: add store cpu counter multiple instruction support Add support for the STORE CPU COUNTER MULTIPLE instruction to extract a range of counters from a counter set. An assembler macro is used to create the instruction opcode because the counter set identifier is part of the instruction and, thus, cannot be easily specified as parameter. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf-insn.h | 22 ++++++++++++++++++++++ arch/s390/include/asm/cpu_mf.h | 17 +++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 arch/s390/include/asm/cpu_mf-insn.h diff --git a/arch/s390/include/asm/cpu_mf-insn.h b/arch/s390/include/asm/cpu_mf-insn.h new file mode 100644 index 000000000000..a68b362e0964 --- /dev/null +++ b/arch/s390/include/asm/cpu_mf-insn.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Support for CPU-MF instructions + * + * Copyright IBM Corp. 2019 + * Author(s): Hendrik Brueckner + */ +#ifndef _ASM_S390_CPU_MF_INSN_H +#define _ASM_S390_CPU_MF_INSN_H + +#ifdef __ASSEMBLY__ + +/* Macro to generate the STCCTM instruction with a customized + * M3 field designating the counter set. + */ +.macro STCCTM r1 m3 db2 + .insn rsy,0xeb0000000017,\r1,\m3 & 0xf,\db2 +.endm + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index bf2cbff926ef..53ba4f2ca296 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -12,6 +12,8 @@ #include #include +asm(".include \"asm/cpu_mf-insn.h\"\n"); + #define CPU_MF_INT_SF_IAE (1 << 31) /* invalid entry address */ #define CPU_MF_INT_SF_ISE (1 << 30) /* incorrect SDBT entry */ #define CPU_MF_INT_SF_PRA (1 << 29) /* program request alert */ @@ -209,6 +211,21 @@ static inline int ecctr(u64 ctr, u64 *val) return cc; } +/* Store CPU counter multiple for a particular counter set */ +static inline int stcctm(u8 set, u64 range, u64 *dest) +{ + int cc; + + asm volatile ( + " STCCTM %2,%3,%1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : "Q" (*dest), "d" (range), "i" (set) + : "cc", "memory"); + return cc; +} + /* Store CPU counter multiple for the MT utilization counter set */ static inline int stcctm5(u64 num, u64 *val) { From 346d034d7f13da9eb135458a2f6cf14c9b77a637 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 29 Aug 2018 18:12:17 +0200 Subject: [PATCH 38/54] s390/cpu_mf: replace stcctm5() with the stcctm() function Remove the stcctm5() function to extract counters from the MT-diagnostic counter set with the stcctm() function. For readability, introduce an enum to map the counter sets names to respective numbers for the stcctm instruction. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mf.h | 25 +++++++++---------------- arch/s390/kernel/vtime.c | 4 ++-- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 53ba4f2ca296..ae3e3221d4b5 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -212,7 +212,15 @@ static inline int ecctr(u64 ctr, u64 *val) } /* Store CPU counter multiple for a particular counter set */ -static inline int stcctm(u8 set, u64 range, u64 *dest) +enum stcctm_ctr_set { + EXTENDED = 0, + BASIC = 1, + PROBLEM_STATE = 2, + CRYPTO_ACTIVITY = 3, + MT_DIAG = 5, + MT_DIAG_CLEARING = 9, /* clears loss-of-MT-ctr-data alert */ +}; +static inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest) { int cc; @@ -226,21 +234,6 @@ static inline int stcctm(u8 set, u64 range, u64 *dest) return cc; } -/* Store CPU counter multiple for the MT utilization counter set */ -static inline int stcctm5(u64 num, u64 *val) -{ - int cc; - - asm volatile ( - " .insn rsy,0xeb0000000017,%2,5,%1\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (cc) - : "Q" (*val), "d" (num) - : "cc", "memory"); - return cc; -} - /* Query sampling information */ static inline int qsi(struct hws_qsi_info_block *info) { diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index f24395a01918..98f850e00008 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -69,7 +69,7 @@ static void update_mt_scaling(void) u64 delta, fac, mult, div; int i; - stcctm5(smp_cpu_mtid + 1, cycles_new); + stcctm(MT_DIAG, smp_cpu_mtid + 1, cycles_new); cycles_old = this_cpu_ptr(mt_cycles); fac = 1; mult = div = 0; @@ -432,6 +432,6 @@ void vtime_init(void) __this_cpu_write(mt_scaling_jiffies, jiffies); __this_cpu_write(mt_scaling_mult, 1); __this_cpu_write(mt_scaling_div, 1); - stcctm5(smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles)); + stcctm(MT_DIAG, smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles)); } } From 869f4f98fafadddb3a871b9513f24a44479d8296 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 25 Oct 2018 16:58:15 +0200 Subject: [PATCH 39/54] s390/cpum_cf: introduce kernel_cpumcf_avail() function A preparation to move out common CPU-MF counter facility support functions, first introduce a function that indicates whether the support is ready to use. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mcf.h | 1 + arch/s390/kernel/perf_cpum_cf.c | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h index 12a4224560bc..7121365a85d9 100644 --- a/arch/s390/include/asm/cpu_mcf.h +++ b/arch/s390/include/asm/cpu_mcf.h @@ -59,6 +59,7 @@ struct cpu_cf_events { }; DECLARE_PER_CPU(struct cpu_cf_events, cpu_cf_events); +bool kernel_cpumcf_avail(void); int __kernel_cpumcf_begin(void); unsigned long kernel_cpumcf_alert(int clear); void __kernel_cpumcf_end(void); diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index c05f69142ce4..ab4b8c904d01 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -33,6 +33,9 @@ DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = { .txn_flags = 0, }; +/* Indicator whether the CPU-Measurement Counter Facility Support is ready */ +static bool cpum_cf_initalized; + static enum cpumf_ctr_set get_counter_set(u64 event) { int set = CPUMF_CTR_SET_MAX; @@ -233,6 +236,12 @@ static void setup_pmc_cpu(void *flags) lcctl(0); } +bool kernel_cpumcf_avail(void) +{ + return cpum_cf_initalized; +} +EXPORT_SYMBOL(kernel_cpumcf_avail); + /* Reserve/release functions for sharing perf hardware */ static DEFINE_SPINLOCK(cpumcf_owner_lock); static void *cpumcf_owner; @@ -709,8 +718,13 @@ static int __init cpumf_pmu_init(void) cpumf_measurement_alert); return rc; } - return cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE, - "perf/s390/cf:online", - s390_pmu_online_cpu, s390_pmu_offline_cpu); + + rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE, + "perf/s390/cf:online", + s390_pmu_online_cpu, s390_pmu_offline_cpu); + if (!rc) + cpum_cf_initalized = true; + + return rc; } early_initcall(cpumf_pmu_init); From 7f5ac1a02242b6d100b6f788dffa9e8681983ee1 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 25 Oct 2018 17:26:34 +0200 Subject: [PATCH 40/54] s390/cpum_cf: move common functions into a separate file Move common functions of the couter facility support into a separate file. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/Makefile | 3 +- arch/s390/kernel/perf_cpum_cf.c | 184 +--------------------- arch/s390/kernel/perf_cpum_cf_common.c | 201 +++++++++++++++++++++++++ 3 files changed, 206 insertions(+), 182 deletions(-) create mode 100644 arch/s390/kernel/perf_cpum_cf_common.c diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 4e188a7be501..ade87df0eb64 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -77,7 +77,8 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o -obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o +obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o obj-$(CONFIG_TRACEPOINTS) += trace.o diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index ab4b8c904d01..670ff98acb62 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -14,28 +14,8 @@ #include #include #include -#include -#include #include -/* Per-CPU event structure for the counter facility */ -DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = { - .ctr_set = { - [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0), - }, - .alert = ATOMIC64_INIT(0), - .state = 0, - .flags = 0, - .txn_flags = 0, -}; - -/* Indicator whether the CPU-Measurement Counter Facility Support is ready */ -static bool cpum_cf_initalized; - static enum cpumf_ctr_set get_counter_set(u64 event) { int set = CPUMF_CTR_SET_MAX; @@ -180,122 +160,6 @@ static atomic_t num_events = ATOMIC_INIT(0); /* Used to avoid races in calling reserve/release_cpumf_hardware */ static DEFINE_MUTEX(pmc_reserve_mutex); -/* CPU-measurement alerts for the counter facility */ -static void cpumf_measurement_alert(struct ext_code ext_code, - unsigned int alert, unsigned long unused) -{ - struct cpu_cf_events *cpuhw; - - if (!(alert & CPU_MF_INT_CF_MASK)) - return; - - inc_irq_stat(IRQEXT_CMC); - cpuhw = this_cpu_ptr(&cpu_cf_events); - - /* Measurement alerts are shared and might happen when the PMU - * is not reserved. Ignore these alerts in this case. */ - if (!(cpuhw->flags & PMU_F_RESERVED)) - return; - - /* counter authorization change alert */ - if (alert & CPU_MF_INT_CF_CACA) - qctri(&cpuhw->info); - - /* loss of counter data alert */ - if (alert & CPU_MF_INT_CF_LCDA) - pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); - - /* loss of MT counter data alert */ - if (alert & CPU_MF_INT_CF_MTDA) - pr_warn("CPU[%i] MT counter data was lost\n", - smp_processor_id()); - - /* store alert for special handling by in-kernel users */ - atomic64_or(alert, &cpuhw->alert); -} - -#define PMC_INIT 0 -#define PMC_RELEASE 1 -static void setup_pmc_cpu(void *flags) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - - switch (*((int *) flags)) { - case PMC_INIT: - memset(&cpuhw->info, 0, sizeof(cpuhw->info)); - qctri(&cpuhw->info); - cpuhw->flags |= PMU_F_RESERVED; - break; - - case PMC_RELEASE: - cpuhw->flags &= ~PMU_F_RESERVED; - break; - } - - /* Disable CPU counter sets */ - lcctl(0); -} - -bool kernel_cpumcf_avail(void) -{ - return cpum_cf_initalized; -} -EXPORT_SYMBOL(kernel_cpumcf_avail); - -/* Reserve/release functions for sharing perf hardware */ -static DEFINE_SPINLOCK(cpumcf_owner_lock); -static void *cpumcf_owner; - -/* Initialize the CPU-measurement counter facility */ -int __kernel_cpumcf_begin(void) -{ - int flags = PMC_INIT; - int err = 0; - - spin_lock(&cpumcf_owner_lock); - if (cpumcf_owner) - err = -EBUSY; - else - cpumcf_owner = __builtin_return_address(0); - spin_unlock(&cpumcf_owner_lock); - if (err) - return err; - - on_each_cpu(setup_pmc_cpu, &flags, 1); - irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); - - return 0; -} -EXPORT_SYMBOL(__kernel_cpumcf_begin); - -/* Obtain the CPU-measurement alerts for the counter facility */ -unsigned long kernel_cpumcf_alert(int clear) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - unsigned long alert; - - alert = atomic64_read(&cpuhw->alert); - if (clear) - atomic64_set(&cpuhw->alert, 0); - - return alert; -} -EXPORT_SYMBOL(kernel_cpumcf_alert); - -/* Release the CPU-measurement counter facility */ -void __kernel_cpumcf_end(void) -{ - int flags = PMC_RELEASE; - - on_each_cpu(setup_pmc_cpu, &flags, 1); - irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); - - spin_lock(&cpumcf_owner_lock); - cpumcf_owner = NULL; - spin_unlock(&cpumcf_owner_lock); -} -EXPORT_SYMBOL(__kernel_cpumcf_end); - /* Release the PMU if event is the last perf event */ static void hw_perf_event_destroy(struct perf_event *event) { @@ -672,59 +536,17 @@ static struct pmu cpumf_pmu = { .cancel_txn = cpumf_pmu_cancel_txn, }; -static int cpumf_pmf_setup(unsigned int cpu, int flags) -{ - local_irq_disable(); - setup_pmc_cpu(&flags); - local_irq_enable(); - return 0; -} - -static int s390_pmu_online_cpu(unsigned int cpu) -{ - return cpumf_pmf_setup(cpu, PMC_INIT); -} - -static int s390_pmu_offline_cpu(unsigned int cpu) -{ - return cpumf_pmf_setup(cpu, PMC_RELEASE); -} - static int __init cpumf_pmu_init(void) { int rc; - if (!cpum_cf_avail()) + if (!kernel_cpumcf_avail()) return -ENODEV; - /* clear bit 15 of cr0 to unauthorize problem-state to - * extract measurement counters */ - ctl_clear_bit(0, 48); - - /* register handler for measurement-alert interruptions */ - rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, - cpumf_measurement_alert); - if (rc) { - pr_err("Registering for CPU-measurement alerts " - "failed with rc=%i\n", rc); - return rc; - } - cpumf_pmu.attr_groups = cpumf_cf_event_group(); rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); - if (rc) { + if (rc) pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); - unregister_external_irq(EXT_IRQ_MEASURE_ALERT, - cpumf_measurement_alert); - return rc; - } - - rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE, - "perf/s390/cf:online", - s390_pmu_online_cpu, s390_pmu_offline_cpu); - if (!rc) - cpum_cf_initalized = true; - return rc; } -early_initcall(cpumf_pmu_init); +subsys_initcall(cpumf_pmu_init); diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c new file mode 100644 index 000000000000..3bced89caffb --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf_common.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CPU-Measurement Counter Facility Support - Common Layer + * + * Copyright IBM Corp. 2019 + * Author(s): Hendrik Brueckner + */ +#define KMSG_COMPONENT "cpum_cf_common" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Per-CPU event structure for the counter facility */ +DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = { + .ctr_set = { + [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), + [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0), + }, + .alert = ATOMIC64_INIT(0), + .state = 0, + .flags = 0, + .txn_flags = 0, +}; +/* Indicator whether the CPU-Measurement Counter Facility Support is ready */ +static bool cpum_cf_initalized; + +/* CPU-measurement alerts for the counter facility */ +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_cf_events *cpuhw; + + if (!(alert & CPU_MF_INT_CF_MASK)) + return; + + inc_irq_stat(IRQEXT_CMC); + cpuhw = this_cpu_ptr(&cpu_cf_events); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* counter authorization change alert */ + if (alert & CPU_MF_INT_CF_CACA) + qctri(&cpuhw->info); + + /* loss of counter data alert */ + if (alert & CPU_MF_INT_CF_LCDA) + pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); + + /* loss of MT counter data alert */ + if (alert & CPU_MF_INT_CF_MTDA) + pr_warn("CPU[%i] MT counter data was lost\n", + smp_processor_id()); + + /* store alert for special handling by in-kernel users */ + atomic64_or(alert, &cpuhw->alert); +} + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +static void cpum_cf_setup_cpu(void *flags) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + + switch (*((int *) flags)) { + case PMC_INIT: + memset(&cpuhw->info, 0, sizeof(cpuhw->info)); + qctri(&cpuhw->info); + cpuhw->flags |= PMU_F_RESERVED; + break; + + case PMC_RELEASE: + cpuhw->flags &= ~PMU_F_RESERVED; + break; + } + + /* Disable CPU counter sets */ + lcctl(0); +} + +bool kernel_cpumcf_avail(void) +{ + return cpum_cf_initalized; +} +EXPORT_SYMBOL(kernel_cpumcf_avail); + + +/* Reserve/release functions for sharing perf hardware */ +static DEFINE_SPINLOCK(cpumcf_owner_lock); +static void *cpumcf_owner; + +/* Initialize the CPU-measurement counter facility */ +int __kernel_cpumcf_begin(void) +{ + int flags = PMC_INIT; + int err = 0; + + spin_lock(&cpumcf_owner_lock); + if (cpumcf_owner) + err = -EBUSY; + else + cpumcf_owner = __builtin_return_address(0); + spin_unlock(&cpumcf_owner_lock); + if (err) + return err; + + on_each_cpu(cpum_cf_setup_cpu, &flags, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + return 0; +} +EXPORT_SYMBOL(__kernel_cpumcf_begin); + +/* Obtain the CPU-measurement alerts for the counter facility */ +unsigned long kernel_cpumcf_alert(int clear) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + unsigned long alert; + + alert = atomic64_read(&cpuhw->alert); + if (clear) + atomic64_set(&cpuhw->alert, 0); + + return alert; +} +EXPORT_SYMBOL(kernel_cpumcf_alert); + +/* Release the CPU-measurement counter facility */ +void __kernel_cpumcf_end(void) +{ + int flags = PMC_RELEASE; + + on_each_cpu(cpum_cf_setup_cpu, &flags, 1); + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + spin_lock(&cpumcf_owner_lock); + cpumcf_owner = NULL; + spin_unlock(&cpumcf_owner_lock); +} +EXPORT_SYMBOL(__kernel_cpumcf_end); + +static int cpum_cf_setup(unsigned int cpu, int flags) +{ + local_irq_disable(); + cpum_cf_setup_cpu(&flags); + local_irq_enable(); + return 0; +} + +static int cpum_cf_online_cpu(unsigned int cpu) +{ + return cpum_cf_setup(cpu, PMC_INIT); +} + +static int cpum_cf_offline_cpu(unsigned int cpu) +{ + return cpum_cf_setup(cpu, PMC_RELEASE); +} + +static int __init cpum_cf_init(void) +{ + int rc; + + if (!cpum_cf_avail()) + return -ENODEV; + + /* clear bit 15 of cr0 to unauthorize problem-state to + * extract measurement counters */ + ctl_clear_bit(0, 48); + + /* register handler for measurement-alert interruptions */ + rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + if (rc) { + pr_err("Registering for CPU-measurement alerts " + "failed with rc=%i\n", rc); + return rc; + } + + rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE, + "perf/s390/cf:online", + cpum_cf_online_cpu, cpum_cf_offline_cpu); + if (!rc) + cpum_cf_initalized = true; + + return rc; +} +early_initcall(cpum_cf_init); From 86c0b75715e711c035ae0ed9820ae95f14fe2c0d Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 26 Oct 2018 09:48:29 +0200 Subject: [PATCH 41/54] s390/cpum_cf: add ctr_stcctm() function Introduce the ctr_stcctm() function as wrapper function to extract counters from a particular counter set. Note that the counter set is part of the stcctm instruction opcode, few indirections are necessary to specify the counter set as variable. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mcf.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h index 7121365a85d9..0c236d1a7aee 100644 --- a/arch/s390/include/asm/cpu_mcf.h +++ b/arch/s390/include/asm/cpu_mcf.h @@ -49,6 +49,25 @@ static inline void ctr_set_stop(u64 *state, int ctr_set) *state &= ~(cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); } +static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) +{ + switch (set) { + case CPUMF_CTR_SET_BASIC: + return stcctm(BASIC, range, dest); + case CPUMF_CTR_SET_USER: + return stcctm(PROBLEM_STATE, range, dest); + case CPUMF_CTR_SET_CRYPTO: + return stcctm(CRYPTO_ACTIVITY, range, dest); + case CPUMF_CTR_SET_EXT: + return stcctm(EXTENDED, range, dest); + case CPUMF_CTR_SET_MT_DIAG: + return stcctm(MT_DIAG_CLEARING, range, dest); + case CPUMF_CTR_SET_MAX: + return 3; + } + return 3; +} + struct cpu_cf_events { struct cpumf_ctr_info info; atomic_t ctr_set[CPUMF_CTR_SET_MAX]; From fe5908bccc565f85cab025695627678cf257f91e Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 29 Oct 2018 13:16:38 +0000 Subject: [PATCH 42/54] s390/cpum_cf_diag: Add support for s390 counter facility diagnostic trace Introduce a PMU device named cpum_cf_diag. It extracts the values of all counters in all authorized counter sets and stores them as event raw data. This is done with the STORE CPU COUNTER MULTIPLE instruction to speed up access. All counter sets fit into one buffer. The values of each counter are taken when the event is started on the performance sub-system and when the event is stopped. This results in counter values available at the start and at the end of the measurement time frame. The difference is calculated for each counter. The differences of all counters are then saved as event raw data in the perf.data file. The counter values are accompanied by the time stamps when the counter set was started and when the counter set was stopped. This data is part of a trailer entry which describes the time frame, counter set version numbers, CPU speed, and machine type for later analysis. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu_mcf.h | 26 + arch/s390/include/asm/perf_event.h | 1 + arch/s390/kernel/Makefile | 1 + arch/s390/kernel/perf_cpum_cf_diag.c | 693 +++++++++++++++++++++++++++ 4 files changed, 721 insertions(+) create mode 100644 arch/s390/kernel/perf_cpum_cf_diag.c diff --git a/arch/s390/include/asm/cpu_mcf.h b/arch/s390/include/asm/cpu_mcf.h index 0c236d1a7aee..649b9fc60685 100644 --- a/arch/s390/include/asm/cpu_mcf.h +++ b/arch/s390/include/asm/cpu_mcf.h @@ -49,6 +49,26 @@ static inline void ctr_set_stop(u64 *state, int ctr_set) *state &= ~(cpumf_ctr_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT); } +static inline void ctr_set_multiple_enable(u64 *state, u64 ctrsets) +{ + *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT; +} + +static inline void ctr_set_multiple_disable(u64 *state, u64 ctrsets) +{ + *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT); +} + +static inline void ctr_set_multiple_start(u64 *state, u64 ctrsets) +{ + *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT; +} + +static inline void ctr_set_multiple_stop(u64 *state, u64 ctrsets) +{ + *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT); +} + static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) { switch (set) { @@ -97,4 +117,10 @@ static inline void kernel_cpumcf_end(void) preempt_enable(); } +/* Return true if store counter set multiple instruction is available */ +static inline int stccm_avail(void) +{ + return test_facility(142); +} + #endif /* _ASM_S390_CPU_MCF_H */ diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 70240961df74..560d8f766ddf 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -54,6 +54,7 @@ struct perf_sf_sde_regs { #define PERF_CPUM_SF_MAX_CTR 2 #define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */ #define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */ +#define PERF_EVENT_CPUM_CF_DIAG 0xBC000UL /* Event: Counter sets */ #define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */ #define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */ #define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index ade87df0eb64..8a62c7f72e1b 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -80,6 +80,7 @@ obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o +obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_diag.o obj-$(CONFIG_TRACEPOINTS) += trace.o diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c new file mode 100644 index 000000000000..c6fad208c2fa --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -0,0 +1,693 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Performance event support for s390x - CPU-measurement Counter Sets + * + * Copyright IBM Corp. 2019 + * Author(s): Hendrik Brueckner + * Thomas Richer + */ +#define KMSG_COMPONENT "cpum_cf_diag" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */ + +static unsigned int cf_diag_cpu_speed; +static debug_info_t *cf_diag_dbg; + +struct cf_diag_csd { /* Counter set data per CPU */ + size_t used; /* Bytes used in data/start */ + unsigned char start[PAGE_SIZE]; /* Counter set at event start */ + unsigned char data[PAGE_SIZE]; /* Counter set at event delete */ +}; +DEFINE_PER_CPU(struct cf_diag_csd, cf_diag_csd); + +/* Counter sets are stored as data stream in a page sized memory buffer and + * exported to user space via raw data attached to the event sample data. + * Each counter set starts with an eight byte header consisting of: + * - a two byte eye catcher (0xfeef) + * - a one byte counter set number + * - a two byte counter set size (indicates the number of counters in this set) + * - a three byte reserved value (must be zero) to make the header the same + * size as a counter value. + * All counter values are eight byte in size. + * + * All counter sets are followed by a 64 byte trailer. + * The trailer consists of a: + * - flag field indicating valid fields when corresponding bit set + * - the counter facility first and second version number + * - the CPU speed if nonzero + * - the time stamp the counter sets have been collected + * - the time of day (TOD) base value + * - the machine type. + * + * The counter sets are saved when the process is prepared to be executed on a + * CPU and saved again when the process is going to be removed from a CPU. + * The difference of both counter sets are calculated and stored in the event + * sample data area. + */ + +struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int set:16; /* 16-31 Counter set identifier */ + unsigned int ctr:16; /* 32-47 Number of stored counters */ + unsigned int res1:16; /* 48-63 Reserved */ +}; + +struct cf_trailer_entry { /* CPU-M CF_DIAG trailer (64 byte) */ + /* 0 - 7 */ + union { + struct { + unsigned int clock_base:1; /* TOD clock base set */ + unsigned int speed:1; /* CPU speed set */ + /* Measurement alerts */ + unsigned int mtda:1; /* Loss of MT ctr. data alert */ + unsigned int caca:1; /* Counter auth. change alert */ + unsigned int lcda:1; /* Loss of counter data alert */ + }; + unsigned long flags; /* 0-63 All indicators */ + }; + /* 8 - 15 */ + unsigned int cfvn:16; /* 64-79 Ctr First Version */ + unsigned int csvn:16; /* 80-95 Ctr Second Version */ + unsigned int cpu_speed:32; /* 96-127 CPU speed */ + /* 16 - 23 */ + unsigned long timestamp; /* 128-191 Timestamp (TOD) */ + /* 24 - 55 */ + union { + struct { + unsigned long progusage1; + unsigned long progusage2; + unsigned long progusage3; + unsigned long tod_base; + }; + unsigned long progusage[4]; + }; + /* 56 - 63 */ + unsigned int mach_type:16; /* Machine type */ + unsigned int res1:16; /* Reserved */ + unsigned int res2:32; /* Reserved */ +}; + +/* Create the trailer data at the end of a page. */ +static void cf_diag_trailer(struct cf_trailer_entry *te) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpuid cpuid; + + te->cfvn = cpuhw->info.cfvn; /* Counter version numbers */ + te->csvn = cpuhw->info.csvn; + + get_cpu_id(&cpuid); /* Machine type */ + te->mach_type = cpuid.machine; + te->cpu_speed = cf_diag_cpu_speed; + if (te->cpu_speed) + te->speed = 1; + te->clock_base = 1; /* Save clock base */ + memcpy(&te->tod_base, &tod_clock_base[1], 8); + store_tod_clock((__u64 *)&te->timestamp); +} + +/* + * Change the CPUMF state to active. + * Enable and activate the CPU-counter sets according + * to the per-cpu control state. + */ +static void cf_diag_enable(struct pmu *pmu) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + int err; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s pmu %p cpu %d flags %#x state %#llx\n", + __func__, pmu, smp_processor_id(), cpuhw->flags, + cpuhw->state); + if (cpuhw->flags & PMU_F_ENABLED) + return; + + err = lcctl(cpuhw->state); + if (err) { + pr_err("Enabling the performance measuring unit " + "failed with rc=%x\n", err); + return; + } + cpuhw->flags |= PMU_F_ENABLED; +} + +/* + * Change the CPUMF state to inactive. + * Disable and enable (inactive) the CPU-counter sets according + * to the per-cpu control state. + */ +static void cf_diag_disable(struct pmu *pmu) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + u64 inactive; + int err; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s pmu %p cpu %d flags %#x state %#llx\n", + __func__, pmu, smp_processor_id(), cpuhw->flags, + cpuhw->state); + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); + err = lcctl(inactive); + if (err) { + pr_err("Disabling the performance measuring unit " + "failed with rc=%x\n", err); + return; + } + cpuhw->flags &= ~PMU_F_ENABLED; +} + +/* Number of perf events counting hardware events */ +static atomic_t cf_diag_events = ATOMIC_INIT(0); + +/* Release the PMU if event is the last perf event */ +static void cf_diag_perf_event_destroy(struct perf_event *event) +{ + debug_sprintf_event(cf_diag_dbg, 5, + "%s event %p cpu %d cf_diag_events %d\n", + __func__, event, event->cpu, + atomic_read(&cf_diag_events)); + if (atomic_dec_return(&cf_diag_events) == 0) + __kernel_cpumcf_end(); +} + +/* Setup the event. Test for authorized counter sets and only include counter + * sets which are authorized at the time of the setup. Including unauthorized + * counter sets result in specification exception (and panic). + */ +static int __hw_perf_event_init(struct perf_event *event) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct perf_event_attr *attr = &event->attr; + enum cpumf_ctr_set i; + int err = 0; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s event %p cpu %d authorized %#x\n", __func__, + event, event->cpu, cpuhw->info.auth_ctl); + + event->hw.config = attr->config; + event->hw.config_base = 0; + local64_set(&event->count, 0); + + /* Add all authorized counter sets to config_base */ + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) + if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) + event->hw.config_base |= cpumf_ctr_ctl[i]; + + /* No authorized counter sets, nothing to count/sample */ + if (!event->hw.config_base) { + err = -EINVAL; + goto out; + } + + /* Set sample_period to indicate sampling */ + event->hw.sample_period = attr->sample_period; + local64_set(&event->hw.period_left, event->hw.sample_period); + event->hw.last_period = event->hw.sample_period; +out: + debug_sprintf_event(cf_diag_dbg, 5, "%s err %d config_base %#lx\n", + __func__, err, event->hw.config_base); + return err; +} + +static int cf_diag_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + int err = -ENOENT; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s event %p cpu %d config %#llx " + "sample_type %#llx cf_diag_events %d\n", __func__, + event, event->cpu, attr->config, attr->sample_type, + atomic_read(&cf_diag_events)); + + if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || + event->attr.type != PERF_TYPE_RAW) + goto out; + + /* Raw events are used to access counters directly, + * hence do not permit excludes. + * This event is usesless without PERF_SAMPLE_RAW to return counter set + * values as raw data. + */ + if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv || + !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) { + err = -EOPNOTSUPP; + goto out; + } + + /* Initialize for using the CPU-measurement counter facility */ + if (atomic_inc_return(&cf_diag_events) == 1) { + if (__kernel_cpumcf_begin()) { + atomic_dec(&cf_diag_events); + err = -EBUSY; + goto out; + } + } + event->destroy = cf_diag_perf_event_destroy; + + err = __hw_perf_event_init(event); + if (unlikely(err)) + event->destroy(event); +out: + debug_sprintf_event(cf_diag_dbg, 5, "%s err %d\n", __func__, err); + return err; +} + +static void cf_diag_read(struct perf_event *event) +{ + debug_sprintf_event(cf_diag_dbg, 5, "%s event %p\n", __func__, event); +} + +/* Return the maximum possible counter set size (in number of 8 byte counters) + * depending on type and model number. + */ +static size_t cf_diag_ctrset_size(enum cpumf_ctr_set ctrset, + struct cpumf_ctr_info *info) +{ + size_t ctrset_size = 0; + + switch (ctrset) { + case CPUMF_CTR_SET_BASIC: + if (info->cfvn >= 1) + ctrset_size = 6; + break; + case CPUMF_CTR_SET_USER: + if (info->cfvn == 1) + ctrset_size = 6; + else if (info->cfvn >= 3) + ctrset_size = 2; + break; + case CPUMF_CTR_SET_CRYPTO: + ctrset_size = 16; + break; + case CPUMF_CTR_SET_EXT: + if (info->csvn == 1) + ctrset_size = 32; + else if (info->csvn == 2) + ctrset_size = 48; + else if (info->csvn >= 3) + ctrset_size = 128; + break; + case CPUMF_CTR_SET_MT_DIAG: + if (info->csvn > 3) + ctrset_size = 48; + break; + case CPUMF_CTR_SET_MAX: + break; + } + + return ctrset_size; +} + +/* Calculate memory needed to store all counter sets together with header and + * trailer data. This is independend of the counter set authorization which + * can vary depending on the configuration. + */ +static size_t cf_diag_ctrset_maxsize(struct cpumf_ctr_info *info) +{ + size_t max_size = sizeof(struct cf_trailer_entry); + enum cpumf_ctr_set i; + + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + size_t size = cf_diag_ctrset_size(i, info); + + if (size) + max_size += size * sizeof(u64) + + sizeof(struct cf_ctrset_entry); + } + debug_sprintf_event(cf_diag_dbg, 5, "%s max_size %zu\n", __func__, + max_size); + + return max_size; +} + +/* Read a counter set. The counter set number determines which counter set and + * the CPUM-CF first and second version number determine the number of + * available counters in this counter set. + * Each counter set starts with header containing the counter set number and + * the number of 8 byte counters. + * + * The functions returns the number of bytes occupied by this counter set + * including the header. + * If there is no counter in the counter set, this counter set is useless and + * zero is returned on this case. + */ +static size_t cf_diag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, + size_t room) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + size_t ctrset_size, need = 0; + int rc = 3; /* Assume write failure */ + + ctrdata->def = CF_DIAG_CTRSET_DEF; + ctrdata->set = ctrset; + ctrdata->res1 = 0; + ctrset_size = cf_diag_ctrset_size(ctrset, &cpuhw->info); + + if (ctrset_size) { /* Save data */ + need = ctrset_size * sizeof(u64) + sizeof(*ctrdata); + if (need <= room) + rc = ctr_stcctm(ctrset, ctrset_size, + (u64 *)(ctrdata + 1)); + if (rc != 3) + ctrdata->ctr = ctrset_size; + else + need = 0; + } + + debug_sprintf_event(cf_diag_dbg, 6, + "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" + " need %zd rc:%d\n", + __func__, ctrset, ctrset_size, cpuhw->info.cfvn, + cpuhw->info.csvn, need, rc); + return need; +} + +/* Read out all counter sets and save them in the provided data buffer. + * The last 64 byte host an artificial trailer entry. + */ +static size_t cf_diag_getctr(void *data, size_t sz, unsigned long auth) +{ + struct cf_trailer_entry *trailer; + size_t offset = 0, done; + int i; + + memset(data, 0, sz); + sz -= sizeof(*trailer); /* Always room for trailer */ + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { + struct cf_ctrset_entry *ctrdata = data + offset; + + if (!(auth & cpumf_ctr_ctl[i])) + continue; /* Counter set not authorized */ + + done = cf_diag_getctrset(ctrdata, i, sz - offset); + offset += done; + debug_sprintf_event(cf_diag_dbg, 6, + "%s ctrset %d offset %zu done %zu\n", + __func__, i, offset, done); + } + trailer = data + offset; + cf_diag_trailer(trailer); + return offset + sizeof(*trailer); +} + +/* Calculate the difference for each counter in a counter set. */ +static void cf_diag_diffctrset(u64 *pstart, u64 *pstop, int counters) +{ + for (; --counters >= 0; ++pstart, ++pstop) + if (*pstop >= *pstart) + *pstop -= *pstart; + else + *pstop = *pstart - *pstop; +} + +/* Scan the counter sets and calculate the difference of each counter + * in each set. The result is the increment of each counter during the + * period the counter set has been activated. + * + * Return true on success. + */ +static int cf_diag_diffctr(struct cf_diag_csd *csd, unsigned long auth) +{ + struct cf_trailer_entry *trailer_start, *trailer_stop; + struct cf_ctrset_entry *ctrstart, *ctrstop; + size_t offset = 0; + + auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; + do { + ctrstart = (struct cf_ctrset_entry *)(csd->start + offset); + ctrstop = (struct cf_ctrset_entry *)(csd->data + offset); + + if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { + pr_err("cpum_cf_diag counter set compare error " + "in set %i\n", ctrstart->set); + return 0; + } + auth &= ~cpumf_ctr_ctl[ctrstart->set]; + if (ctrstart->def == CF_DIAG_CTRSET_DEF) { + cf_diag_diffctrset((u64 *)(ctrstart + 1), + (u64 *)(ctrstop + 1), ctrstart->ctr); + offset += ctrstart->ctr * sizeof(u64) + + sizeof(*ctrstart); + } + debug_sprintf_event(cf_diag_dbg, 6, + "%s set %d ctr %d offset %zu auth %lx\n", + __func__, ctrstart->set, ctrstart->ctr, + offset, auth); + } while (ctrstart->def && auth); + + /* Save time_stamp from start of event in stop's trailer */ + trailer_start = (struct cf_trailer_entry *)(csd->start + offset); + trailer_stop = (struct cf_trailer_entry *)(csd->data + offset); + trailer_stop->progusage[0] = trailer_start->timestamp; + + return 1; +} + +/* Create perf event sample with the counter sets as raw data. The sample + * is then pushed to the event subsystem and the function checks for + * possible event overflows. If an event overflow occurs, the PMU is + * stopped. + * + * Return non-zero if an event overflow occurred. + */ +static int cf_diag_push_sample(struct perf_event *event, + struct cf_diag_csd *csd) +{ + struct perf_sample_data data; + struct perf_raw_record raw; + struct pt_regs regs; + int overflow; + + /* Setup perf sample */ + perf_sample_data_init(&data, 0, event->hw.last_period); + memset(®s, 0, sizeof(regs)); + memset(&raw, 0, sizeof(raw)); + + if (event->attr.sample_type & PERF_SAMPLE_CPU) + data.cpu_entry.cpu = event->cpu; + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw.frag.size = csd->used; + raw.frag.data = csd->data; + raw.size = csd->used; + data.raw = &raw; + } + + overflow = perf_event_overflow(event, &data, ®s); + debug_sprintf_event(cf_diag_dbg, 6, + "%s event %p cpu %d sample_type %#llx raw %d " + "ov %d\n", __func__, event, event->cpu, + event->attr.sample_type, raw.size, overflow); + if (overflow) + event->pmu->stop(event, 0); + + perf_event_update_userpage(event); + return overflow; +} + +static void cf_diag_start(struct perf_event *event, int flags) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd); + struct hw_perf_event *hwc = &event->hw; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s event %p cpu %d flags %#x hwc-state %#x\n", + __func__, event, event->cpu, flags, hwc->state); + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + /* (Re-)enable and activate all counter sets */ + lcctl(0); /* Reset counter sets */ + hwc->state = 0; + ctr_set_multiple_enable(&cpuhw->state, hwc->config_base); + lcctl(cpuhw->state); /* Enable counter sets */ + csd->used = cf_diag_getctr(csd->start, sizeof(csd->start), + event->hw.config_base); + ctr_set_multiple_start(&cpuhw->state, hwc->config_base); + /* Function cf_diag_enable() starts the counter sets. */ +} + +static void cf_diag_stop(struct perf_event *event, int flags) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd); + struct hw_perf_event *hwc = &event->hw; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s event %p cpu %d flags %#x hwc-state %#x\n", + __func__, event, event->cpu, flags, hwc->state); + + /* Deactivate all counter sets */ + ctr_set_multiple_stop(&cpuhw->state, hwc->config_base); + local64_inc(&event->count); + csd->used = cf_diag_getctr(csd->data, sizeof(csd->data), + event->hw.config_base); + if (cf_diag_diffctr(csd, event->hw.config_base)) + cf_diag_push_sample(event, csd); + hwc->state |= PERF_HES_STOPPED; +} + +static int cf_diag_add(struct perf_event *event, int flags) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + int err = 0; + + debug_sprintf_event(cf_diag_dbg, 5, + "%s event %p cpu %d flags %#x cpuhw:%p\n", + __func__, event, event->cpu, flags, cpuhw); + + if (cpuhw->flags & PMU_F_IN_USE) { + err = -EAGAIN; + goto out; + } + + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + cpuhw->flags |= PMU_F_IN_USE; + if (flags & PERF_EF_START) + cf_diag_start(event, PERF_EF_RELOAD); +out: + debug_sprintf_event(cf_diag_dbg, 5, "%s err %d\n", __func__, err); + return err; +} + +static void cf_diag_del(struct perf_event *event, int flags) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + + debug_sprintf_event(cf_diag_dbg, 5, + "%s event %p cpu %d flags %#x\n", + __func__, event, event->cpu, flags); + + cf_diag_stop(event, PERF_EF_UPDATE); + ctr_set_multiple_stop(&cpuhw->state, event->hw.config_base); + ctr_set_multiple_disable(&cpuhw->state, event->hw.config_base); + cpuhw->flags &= ~PMU_F_IN_USE; +} + +CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG); + +static struct attribute *cf_diag_events_attr[] = { + CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG), + NULL, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cf_diag_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cf_diag_events_group = { + .name = "events", + .attrs = cf_diag_events_attr, +}; +static struct attribute_group cf_diag_format_group = { + .name = "format", + .attrs = cf_diag_format_attr, +}; +static const struct attribute_group *cf_diag_attr_groups[] = { + &cf_diag_events_group, + &cf_diag_format_group, + NULL, +}; + +/* Performance monitoring unit for s390x */ +static struct pmu cf_diag = { + .task_ctx_nr = perf_sw_context, + .pmu_enable = cf_diag_enable, + .pmu_disable = cf_diag_disable, + .event_init = cf_diag_event_init, + .add = cf_diag_add, + .del = cf_diag_del, + .start = cf_diag_start, + .stop = cf_diag_stop, + .read = cf_diag_read, + + .attr_groups = cf_diag_attr_groups +}; + +/* Get the CPU speed, try sampling facility first and CPU attributes second. */ +static void cf_diag_get_cpu_speed(void) +{ + if (cpum_sf_avail()) { /* Sampling facility first */ + struct hws_qsi_info_block si; + + memset(&si, 0, sizeof(si)); + if (!qsi(&si)) { + cf_diag_cpu_speed = si.cpu_speed; + return; + } + } + + if (test_facility(34)) { /* CPU speed extract static part */ + unsigned long mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0); + + if (mhz != -1UL) + cf_diag_cpu_speed = mhz & 0xffffffff; + } +} + +/* Initialize the counter set PMU to generate complete counter set data as + * event raw data. This relies on the CPU Measurement Counter Facility device + * already being loaded and initialized. + */ +static int __init cf_diag_init(void) +{ + struct cpumf_ctr_info info; + size_t need; + int rc; + + if (!kernel_cpumcf_avail() || !stccm_avail() || qctri(&info)) + return -ENODEV; + cf_diag_get_cpu_speed(); + + /* Make sure the counter set data fits into predefined buffer. */ + need = cf_diag_ctrset_maxsize(&info); + if (need > sizeof(((struct cf_diag_csd *)0)->start)) { + pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n", + need); + return -ENOMEM; + } + + /* Setup s390dbf facility */ + cf_diag_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); + if (!cf_diag_dbg) { + pr_err("Registration of s390dbf(cpum_cf_diag) failed\n"); + return -ENOMEM; + } + debug_register_view(cf_diag_dbg, &debug_sprintf_view); + + rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", PERF_TYPE_RAW); + if (rc) { + debug_unregister_view(cf_diag_dbg, &debug_sprintf_view); + debug_unregister(cf_diag_dbg); + pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n", + rc); + } + return rc; +} +arch_initcall(cf_diag_init); From fb3a0b61e0d4e435016cc91575d051f841791da0 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 23 Jan 2019 13:05:11 +0000 Subject: [PATCH 43/54] s390/cpum_cf: Add kernel message exaplanations The CPU Measurement facility for counters and counter set rework adds a few new kernel messages to the system log. Add an explanation for some of these. Signed-off-by: Thomas Richter Signed-off-by: Martin Schwidefsky --- Documentation/kmsg/s390/cpum_cf_common | 36 +++++++++++++++++++++ Documentation/kmsg/s390/cpum_cf_diag | 45 ++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 Documentation/kmsg/s390/cpum_cf_common create mode 100644 Documentation/kmsg/s390/cpum_cf_diag diff --git a/Documentation/kmsg/s390/cpum_cf_common b/Documentation/kmsg/s390/cpum_cf_common new file mode 100644 index 000000000000..c890a5dabdd6 --- /dev/null +++ b/Documentation/kmsg/s390/cpum_cf_common @@ -0,0 +1,36 @@ +/*? + * Text: "CPU[%i] Counter data was lost\n" + * Severity: Error + * Parameter: + * @1: CPU identifier + * Description: + * The kernel could not extract data provided by the Performance Measurement Unit + * (PMU) fast enough, some data is overwritten. + * User action: + * None + */ + +/*? + * Text: "CPU[%i] MT counter data was lost\n" + * Severity: Warning + * Parameter: + * @1: CPU identifier + * Description: + * The kernel could not extract data provided by the Performance Measurement Unit + * (PMU) fast enough, some data is overwritten. + * User action: + * None + */ + +/*? + * Text: "Registering for CPU-measurement alerts failed with rc=%i\n" + * Severity: Error + * Parameter: + * @1: Return code of failed interrupt handler registration + * Description: + * Please insert description here + * User action: + * Reboot. If the problem persists, gather Linux debug data and report + * the problem to your support organization. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/cpum_cf_diag b/Documentation/kmsg/s390/cpum_cf_diag new file mode 100644 index 000000000000..fd166f96ded8 --- /dev/null +++ b/Documentation/kmsg/s390/cpum_cf_diag @@ -0,0 +1,45 @@ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ +/*? Text: "cpum_cf_diag counter set compare error in set %i\n" */ +/*? Text: "Registration of s390dbf(cpum_cf_diag) failed\n" */ +/*? Text: "Registration of PMU(cpum_cf_diag) failed with rc=%i\n" */ + +/*? + * Text: "Enabling the performance measuring unit failed with rc=%x\n" + * Severity: Error + * Parameter: + * @1: Error code returned by PMU enable function + * Description: + * The CPU Measurement Facilitiy for counters needs to be enabled to extract + * counter data. + * User action: + * To use the CPU-measurement counter facility, authorize the LPAR for each + * counter set you want to use. Customize the LPAR activation profile and modify + * the Counter Facility Security Options. Then reboot. + * If the problem persists, gather Linux debug data and report the + * problem to your support organization. + */ + +/*? + * Text: "Disabling the performance measuring unit failed with rc=%x\n" + * Severity: Error + * Parameter: + * @1: Error code returned by PMU disable function + * Description: + * The CPU Measurement Facilitiy for counters is disabled to stop extraction + * of counter data. + * User action: + * Reboot. If the problem persists, gather Linux debug data and report the + * problem to your support organization. + */ + +/*? + * Text: "Insufficient memory for PMU(cpum_cf_diag) need=%zu\n" + * Severity: Error + * Parameter: + * @1: Required amount of bytes needed to store the counter sets. + * Description: + * The preallocted memory to store counter sets is too small. + * User action: + * If the problem persists, gather Linux debug data and report the + * problem to your support organization. + */ From 47b7478583aa0ba7b85a88d5ece78a21a7c846c5 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 22 Jan 2019 10:55:18 +0000 Subject: [PATCH 44/54] s390/cpum_cf: Handle EBUSY return code from CPU counter facility reservation Rservation of the CPU Measurement Counter facility may fail if it is already in use by the cf_diag device driver. This is indicated by a non zero return code (-EBUSY). However this return code is ignored and the counter facility may be used in parallel by different device drivers. Handle the failing reservation and return an error to the caller. Signed-off-by: Thomas Richter Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 670ff98acb62..e1c54d28713a 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -197,7 +197,7 @@ static int __hw_perf_event_init(struct perf_event *event) struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; enum cpumf_ctr_set set; - int err; + int err = 0; u64 ev; switch (attr->type) { @@ -273,6 +273,8 @@ static int __hw_perf_event_init(struct perf_event *event) atomic_inc(&num_events); mutex_unlock(&pmc_reserve_mutex); } + if (err) + return err; event->destroy = hw_perf_event_destroy; /* Finally, validate version and authorization of the counter set */ From 36360658eb5a6cf04bb9f2704d1e4ce54037ec99 Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Tue, 12 Feb 2019 16:53:45 +0100 Subject: [PATCH 45/54] s390: vfio_ap: link the vfio_ap devices to the vfio_ap bus subsystem Libudev relies on having a subsystem link for non-root devices. To avoid libudev (and potentially other userspace tools) choking on the matrix device let us introduce a matrix bus and with it the matrix bus subsytem. Also make the matrix device reside within the matrix bus. Doing this we remove the forced link from the matrix device to the vfio_ap driver and the device_type we do not need anymore. Since the associated matrix driver is not the vfio_ap driver any more, we have to change the search for the devices on the vfio_ap driver in the function vfio_ap_verify_queue_reserved. Fixes: 1fde573413b5 ("s390: vfio-ap: base implementation of VFIO AP device driver") Cc: stable@vger.kernel.org Reported-by: Marc Hartmayer Reported-by: Christian Borntraeger Signed-off-by: Pierre Morel Tested-by: Christian Borntraeger Reviewed-by: Cornelia Huck Reviewed-by: Tony Krowiak Acked-by: Halil Pasic Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/vfio_ap_drv.c | 44 +++++++++++++++++++++------ drivers/s390/crypto/vfio_ap_ops.c | 4 +-- drivers/s390/crypto/vfio_ap_private.h | 1 + 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index 31c6c847eaca..e9824c35c34f 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -15,7 +15,6 @@ #include "vfio_ap_private.h" #define VFIO_AP_ROOT_NAME "vfio_ap" -#define VFIO_AP_DEV_TYPE_NAME "ap_matrix" #define VFIO_AP_DEV_NAME "matrix" MODULE_AUTHOR("IBM Corporation"); @@ -24,10 +23,6 @@ MODULE_LICENSE("GPL v2"); static struct ap_driver vfio_ap_drv; -static struct device_type vfio_ap_dev_type = { - .name = VFIO_AP_DEV_TYPE_NAME, -}; - struct ap_matrix_dev *matrix_dev; /* Only type 10 adapters (CEX4 and later) are supported @@ -62,6 +57,22 @@ static void vfio_ap_matrix_dev_release(struct device *dev) kfree(matrix_dev); } +static int matrix_bus_match(struct device *dev, struct device_driver *drv) +{ + return 1; +} + +static struct bus_type matrix_bus = { + .name = "matrix", + .match = &matrix_bus_match, +}; + +static struct device_driver matrix_driver = { + .name = "vfio_ap", + .bus = &matrix_bus, + .suppress_bind_attrs = true, +}; + static int vfio_ap_matrix_dev_create(void) { int ret; @@ -71,6 +82,10 @@ static int vfio_ap_matrix_dev_create(void) if (IS_ERR(root_device)) return PTR_ERR(root_device); + ret = bus_register(&matrix_bus); + if (ret) + goto bus_register_err; + matrix_dev = kzalloc(sizeof(*matrix_dev), GFP_KERNEL); if (!matrix_dev) { ret = -ENOMEM; @@ -87,30 +102,41 @@ static int vfio_ap_matrix_dev_create(void) mutex_init(&matrix_dev->lock); INIT_LIST_HEAD(&matrix_dev->mdev_list); - matrix_dev->device.type = &vfio_ap_dev_type; dev_set_name(&matrix_dev->device, "%s", VFIO_AP_DEV_NAME); matrix_dev->device.parent = root_device; + matrix_dev->device.bus = &matrix_bus; matrix_dev->device.release = vfio_ap_matrix_dev_release; - matrix_dev->device.driver = &vfio_ap_drv.driver; + matrix_dev->vfio_ap_drv = &vfio_ap_drv; ret = device_register(&matrix_dev->device); if (ret) goto matrix_reg_err; + ret = driver_register(&matrix_driver); + if (ret) + goto matrix_drv_err; + return 0; +matrix_drv_err: + device_unregister(&matrix_dev->device); matrix_reg_err: put_device(&matrix_dev->device); matrix_alloc_err: + bus_unregister(&matrix_bus); +bus_register_err: root_device_unregister(root_device); - return ret; } static void vfio_ap_matrix_dev_destroy(void) { + struct device *root_device = matrix_dev->device.parent; + + driver_unregister(&matrix_driver); device_unregister(&matrix_dev->device); - root_device_unregister(matrix_dev->device.parent); + bus_unregister(&matrix_bus); + root_device_unregister(root_device); } static int __init vfio_ap_init(void) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 272ef427dcc0..900b9cf20ca5 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -198,8 +198,8 @@ static int vfio_ap_verify_queue_reserved(unsigned long *apid, qres.apqi = apqi; qres.reserved = false; - ret = driver_for_each_device(matrix_dev->device.driver, NULL, &qres, - vfio_ap_has_queue); + ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL, + &qres, vfio_ap_has_queue); if (ret) return ret; diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index 5675492233c7..76b7f98e47e9 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -40,6 +40,7 @@ struct ap_matrix_dev { struct ap_config_info info; struct list_head mdev_list; struct mutex lock; + struct ap_driver *vfio_ap_drv; }; extern struct ap_matrix_dev *matrix_dev; From 48bd0eee8eca0920b6a15c4663e02ea434fe1fdf Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 22 Feb 2019 19:39:40 +0100 Subject: [PATCH 46/54] s390/cio: Fix vfio-ccw handling of recursive TICs The routine ccwchain_calc_length() is tasked with looking at a channel program, seeing how many CCWs are chained together by the presence of the Chain-Command flag, and returning a count to the caller. Previously, it also considered a Transfer-in-Channel CCW as being an appropriate mechanism for chaining. The problem at the time was that the TIC CCW will almost certainly not go to the next CCW in memory (because the CC flag would be sufficient), and so advancing to the next 8 bytes will cause us to read potentially invalid memory. So that comparison was removed, and the target of the TIC is processed as a new chain. This is fine when a TIC goes to a new chain (consider a NOP+TIC to a channel program that is being redriven), but there is another scenario where this falls apart. A TIC can be used to "rewind" a channel program, for example to find a particular record on a disk with various orientation CCWs. In this case, we DO want to consider the memory after the TIC since the TIC will be skipped once the requested criteria is met. This is due to the Status Modifier presented by the device, though software doesn't need to operate on it beyond understanding the behavior change of how the channel program is executed. So to handle this, we will re-introduce the check for a TIC CCW but limit it by examining the target of the TIC. If the TIC doesn't go back into the current chain, then current behavior applies; we should stop counting CCWs and let the target of the TIC be handled as a new chain. But, if the TIC DOES go back into the current chain, then we need to keep looking at the memory after the TIC for when the channel breaks out of the TIC loop. We can't use tic_target_chain_exists() because the chain in question hasn't been built yet, so we will redefine that comparison with some small functions to make it more readable and to permit refactoring later. Fixes: 405d566f98ae ("vfio-ccw: Don't assume there are more ccws after a TIC") Signed-off-by: Eric Farman Message-Id: <20190222183941.29596-2-farman@linux.ibm.com> Reviewed-by: Halil Pasic Reviewed-by: Farhan Ali Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 37 +++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index ba08fe137c2e..488b76cd6fb9 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -283,6 +283,33 @@ static long copy_ccw_from_iova(struct channel_program *cp, #define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC)) +/* + * is_cpa_within_range() + * + * @cpa: channel program address being questioned + * @head: address of the beginning of a CCW chain + * @len: number of CCWs within the chain + * + * Determine whether the address of a CCW (whether a new chain, + * or the target of a TIC) falls within a range (including the end points). + * + * Returns 1 if yes, 0 if no. + */ +static inline int is_cpa_within_range(u32 cpa, u32 head, int len) +{ + u32 tail = head + (len - 1) * sizeof(struct ccw1); + + return (head <= cpa && cpa <= tail); +} + +static inline int is_tic_within_range(struct ccw1 *ccw, u32 head, int len) +{ + if (!ccw_is_tic(ccw)) + return 0; + + return is_cpa_within_range(ccw->cda, head, len); +} + static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len) { struct ccwchain *chain; @@ -392,7 +419,15 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp) return -EOPNOTSUPP; } - if (!ccw_is_chain(ccw)) + /* + * We want to keep counting if the current CCW has the + * command-chaining flag enabled, or if it is a TIC CCW + * that loops back into the current chain. The latter + * is used for device orientation, where the CCW PRIOR to + * the TIC can either jump to the TIC or a CCW immediately + * after the TIC, depending on the results of its operation. + */ + if (!ccw_is_chain(ccw) && !is_tic_within_range(ccw, iova, cnt)) break; ccw++; From 2904337fd981217784c2820abdee537aa0c70330 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 22 Feb 2019 19:39:41 +0100 Subject: [PATCH 47/54] s390/cio: Use cpa range elsewhere within vfio-ccw Since we have a little function to see whether a channel program address falls within a range of CCWs, let's use it in the other places of code that make these checks. (Why isn't ccw_head fully removed? Well, because this way some longs lines don't have to be reflowed.) Signed-off-by: Eric Farman Message-Id: <20190222183941.29596-3-farman@linux.ibm.com> Reviewed-by: Farhan Ali Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 488b76cd6fb9..384b3987eeb4 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -443,13 +443,11 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp) static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp) { struct ccwchain *chain; - u32 ccw_head, ccw_tail; + u32 ccw_head; list_for_each_entry(chain, &cp->ccwchain_list, next) { ccw_head = chain->ch_iova; - ccw_tail = ccw_head + (chain->ch_len - 1) * sizeof(struct ccw1); - - if ((ccw_head <= tic->cda) && (tic->cda <= ccw_tail)) + if (is_cpa_within_range(tic->cda, ccw_head, chain->ch_len)) return 1; } @@ -516,13 +514,11 @@ static int ccwchain_fetch_tic(struct ccwchain *chain, { struct ccw1 *ccw = chain->ch_ccw + idx; struct ccwchain *iter; - u32 ccw_head, ccw_tail; + u32 ccw_head; list_for_each_entry(iter, &cp->ccwchain_list, next) { ccw_head = iter->ch_iova; - ccw_tail = ccw_head + (iter->ch_len - 1) * sizeof(struct ccw1); - - if ((ccw_head <= ccw->cda) && (ccw->cda <= ccw_tail)) { + if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) { ccw->cda = (__u32) (addr_t) (((char *)iter->ch_ccw) + (ccw->cda - ccw_head)); return 0; @@ -864,7 +860,7 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw) { struct ccwchain *chain; u32 cpa = scsw->cmd.cpa; - u32 ccw_head, ccw_tail; + u32 ccw_head; /* * LATER: @@ -874,9 +870,7 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw) */ list_for_each_entry(chain, &cp->ccwchain_list, next) { ccw_head = (u32)(u64)chain->ch_ccw; - ccw_tail = (u32)(u64)(chain->ch_ccw + chain->ch_len - 1); - - if ((ccw_head <= cpa) && (cpa <= ccw_tail)) { + if (is_cpa_within_range(cpa, ccw_head, chain->ch_len)) { /* * (cpa - ccw_head) is the offset value of the host * physical ccw to its chain head. From 96d3b64b527f6dc8799ebf00a95e931554311daf Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 27 Feb 2019 16:26:22 +0100 Subject: [PATCH 48/54] s390/als: remove duplicated in-place implementation of stfle Reuse __stfle call instead of in-place implementation. __stfle is using memcpy and memset functions but they are safe to use, since mem.S is built with -march=z900. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/als.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c index d592e0d90d9f..322e386bd9d0 100644 --- a/arch/s390/boot/als.c +++ b/arch/s390/boot/als.c @@ -105,20 +105,7 @@ void verify_facilities(void) { int i; - for (i = 0; i < ARRAY_SIZE(S390_lowcore.stfle_fac_list); i++) - S390_lowcore.stfle_fac_list[i] = 0; - asm volatile( - " stfl 0(0)\n" - : "=m" (S390_lowcore.stfl_fac_list)); - S390_lowcore.stfle_fac_list[0] = (u64)S390_lowcore.stfl_fac_list << 32; - if (S390_lowcore.stfl_fac_list & 0x01000000) { - register unsigned long reg0 asm("0") = ARRAY_SIZE(als) - 1; - - asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */ - : "+d" (reg0) - : "a" (&S390_lowcore.stfle_fac_list) - : "memory", "cc"); - } + __stfle(S390_lowcore.stfle_fac_list, ARRAY_SIZE(S390_lowcore.stfle_fac_list)); for (i = 0; i < ARRAY_SIZE(als); i++) { if ((S390_lowcore.stfle_fac_list[i] & als[i]) != als[i]) facility_mismatch(); From d8901f2b2d04841d75d62c28c18b8b6e57eb49de Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 27 Feb 2019 12:49:01 +0100 Subject: [PATCH 49/54] s390: clean up redundant facilities list setup Facilities list in the lowcore is initially set up by verify_facilities from als.c and later initializations are redundant, so cleaning them up. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 2 -- arch/s390/mm/kasan_init.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index af5c2b3f7065..2871e98fdd0b 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -164,8 +164,6 @@ static noinline __init void setup_lowcore_early(void) static noinline __init void setup_facility_list(void) { - stfle(S390_lowcore.stfle_fac_list, - ARRAY_SIZE(S390_lowcore.stfle_fac_list)); memcpy(S390_lowcore.alt_stfle_fac_list, S390_lowcore.stfle_fac_list, sizeof(S390_lowcore.alt_stfle_fac_list)); diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index bac5c27d11fc..01892dcf4029 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -226,8 +226,6 @@ static void __init kasan_enable_dat(void) static void __init kasan_early_detect_facilities(void) { - __stfle(S390_lowcore.stfle_fac_list, - ARRAY_SIZE(S390_lowcore.stfle_fac_list)); if (test_facility(8)) { has_edat = true; __ctl_set_bit(0, 23); From b5e804598d594934407a1a8548d7b65341fe2617 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 27 Feb 2019 16:52:42 +0100 Subject: [PATCH 50/54] s390: allow overriding facilities via command line Add "facilities=" command line option which allows to override facility bits returned by stfle. The main purpose of that is debugging aids which allows to test specific kernel behaviour depending on specific facilities presence. It also affects CPU alternatives. "facilities=" command line option format is comma separated list of integer values to be additionally set or cleared (if value is starting with "!"). Values ranges are also supported. e.g.: facilities=!130-160,159,167-169 Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/boot.h | 1 + arch/s390/boot/ipl_parm.c | 50 +++++++++++++++++++++++++++++++++++++-- arch/s390/boot/startup.c | 1 + 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index fc41e2277ea8..8665497fd335 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -6,6 +6,7 @@ void startup_kernel(void); void detect_memory(void); void store_ipl_parmblock(void); void setup_boot_command_line(void); +void parse_boot_command_line(void); void setup_memory_end(void); #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 9dab596be98e..94e5374c3630 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "boot.h" char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; @@ -143,8 +144,51 @@ void setup_boot_command_line(void) append_ipl_block_parm(); } +static void modify_facility(unsigned long nr, bool clear) +{ + if (clear) + __clear_facility(nr, S390_lowcore.stfle_fac_list); + else + __set_facility(nr, S390_lowcore.stfle_fac_list); +} + +static void modify_fac_list(char *str) +{ + unsigned long val, endval; + char *endp; + bool clear; + + while (*str) { + clear = false; + if (*str == '!') { + clear = true; + str++; + } + val = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + if (*str == '-') { + str++; + endval = simple_strtoull(str, &endp, 0); + if (str == endp) + break; + str = endp; + while (val <= endval) { + modify_facility(val, clear); + val++; + } + } else { + modify_facility(val, clear); + } + if (*str != ',') + break; + str++; + } +} + static char command_line_buf[COMMAND_LINE_SIZE] __section(.data); -static void parse_mem_opt(void) +void parse_boot_command_line(void) { char *param, *val; bool enabled; @@ -165,12 +209,14 @@ static void parse_mem_opt(void) if (!rc && !enabled) noexec_disabled = 1; } + + if (!strcmp(param, "facilities")) + modify_fac_list(val); } } void setup_memory_end(void) { - parse_mem_opt(); #ifdef CONFIG_CRASH_DUMP if (!OLDMEM_BASE && early_ipl_block_valid && early_ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP && diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 4d441317cdeb..bdfc5549a299 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -53,6 +53,7 @@ void startup_kernel(void) sclp_early_read_info(); store_ipl_parmblock(); setup_boot_command_line(); + parse_boot_command_line(); setup_memory_end(); detect_memory(); if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { From 6d85dac2ab9292956b5864daee8e11c4e158a566 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 27 Feb 2019 17:36:35 +0100 Subject: [PATCH 51/54] s390: warn about clearing als implied facilities Add a warning about removing required architecture level set facilities via "facilities=" command line option. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Martin Schwidefsky --- arch/s390/boot/als.c | 5 +++-- arch/s390/boot/boot.h | 1 + arch/s390/boot/ipl_parm.c | 16 ++++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c index 322e386bd9d0..f902215e9cd9 100644 --- a/arch/s390/boot/als.c +++ b/arch/s390/boot/als.c @@ -7,6 +7,7 @@ #include #include #include +#include "boot.h" /* * The code within this file will be called very early. It may _not_ @@ -58,7 +59,7 @@ static void u16_to_decimal(char *str, u16 val) *str = '\0'; } -static void print_missing_facilities(void) +void print_missing_facilities(void) { static char als_str[80] = "Missing facilities: "; unsigned long val; @@ -90,7 +91,6 @@ static void print_missing_facilities(void) } strcat(als_str, "\n"); sclp_early_printk(als_str); - sclp_early_printk("See Principles of Operations for facility bits\n"); } static void facility_mismatch(void) @@ -98,6 +98,7 @@ static void facility_mismatch(void) sclp_early_printk("The Linux kernel requires more recent processor hardware\n"); print_machine_type(); print_missing_facilities(); + sclp_early_printk("See Principles of Operations for facility bits\n"); disabled_wait(0x8badcccc); } diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 8665497fd335..82bc06346e05 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -8,5 +8,6 @@ void store_ipl_parmblock(void); void setup_boot_command_line(void); void parse_boot_command_line(void); void setup_memory_end(void); +void print_missing_facilities(void); #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 94e5374c3630..36beb56de021 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -152,6 +153,20 @@ static void modify_facility(unsigned long nr, bool clear) __set_facility(nr, S390_lowcore.stfle_fac_list); } +static void check_cleared_facilities(void) +{ + unsigned long als[] = { FACILITIES_ALS }; + int i; + + for (i = 0; i < ARRAY_SIZE(als); i++) { + if ((S390_lowcore.stfle_fac_list[i] & als[i]) != als[i]) { + sclp_early_printk("Warning: The Linux kernel requires facilities cleared via command line option\n"); + print_missing_facilities(); + break; + } + } +} + static void modify_fac_list(char *str) { unsigned long val, endval; @@ -185,6 +200,7 @@ static void modify_fac_list(char *str) break; str++; } + check_cleared_facilities(); } static char command_line_buf[COMMAND_LINE_SIZE] __section(.data); From c8e8ed386a5191d8011d32b5663da10320e7d552 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 1 Mar 2019 15:28:09 +0100 Subject: [PATCH 52/54] s390/suspend: fix prefix register reset in swsusp_arch_resume The reset of the prefix to zero in swsusp_arch_resume uses a 4 byte stack slot. With CONFIG_VMAP_STACK=y this is now in the vmalloc area, this works only with DAT enabled. Move the DAT disable in swsusp_arch_resume after the prefix reset. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/swsusp.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index 537f97fde37f..2f997eb70e18 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -124,13 +124,13 @@ ENTRY(swsusp_arch_resume) lghi %r2,1 brasl %r14,arch_set_page_states - /* Deactivate DAT */ - stnsm __SF_EMPTY(%r15),0xfb - /* Set prefix page to zero */ xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) spx __SF_EMPTY(%r15) + /* Deactivate DAT */ + stnsm __SF_EMPTY(%r15),0xfb + /* Restore saved image */ larl %r1,restore_pblist lg %r1,0(%r1) From 9fe567d09f0f061b7776859fb01c31f89044578f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 1 Mar 2019 16:03:47 +0100 Subject: [PATCH 53/54] s390/dasd: fix read device characteristic with CONFIG_VMAP_STACK=y The dasd_eckd_restore_device() function calls dasd_generic_read_dev_chars with a temporary buffer on the stack. With CONFIG_VMAP_STACK=y this is a vmalloc address but dasd_generic_restore_device() uses the address of the buffer as I/O address. Circumvent this by using the already allocated cqr->data buffer for the RDC data. Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 397af07e4d88..e03304fe25bb 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3965,13 +3965,11 @@ int dasd_generic_restore_device(struct ccw_device *cdev) EXPORT_SYMBOL_GPL(dasd_generic_restore_device); static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, - void *rdc_buffer, int rdc_buffer_size, int magic) { struct dasd_ccw_req *cqr; struct ccw1 *ccw; - unsigned long *idaw; cqr = dasd_smalloc_request(magic, 1 /* RDC */, rdc_buffer_size, device, NULL); @@ -3986,16 +3984,8 @@ static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, ccw = cqr->cpaddr; ccw->cmd_code = CCW_CMD_RDC; - if (idal_is_needed(rdc_buffer, rdc_buffer_size)) { - idaw = (unsigned long *) (cqr->data); - ccw->cda = (__u32)(addr_t) idaw; - ccw->flags = CCW_FLAG_IDA; - idaw = idal_create_words(idaw, rdc_buffer, rdc_buffer_size); - } else { - ccw->cda = (__u32)(addr_t) rdc_buffer; - ccw->flags = 0; - } - + ccw->cda = (__u32)(addr_t) cqr->data; + ccw->flags = 0; ccw->count = rdc_buffer_size; cqr->startdev = device; cqr->memdev = device; @@ -4013,12 +4003,13 @@ int dasd_generic_read_dev_chars(struct dasd_device *device, int magic, int ret; struct dasd_ccw_req *cqr; - cqr = dasd_generic_build_rdc(device, rdc_buffer, rdc_buffer_size, - magic); + cqr = dasd_generic_build_rdc(device, rdc_buffer_size, magic); if (IS_ERR(cqr)) return PTR_ERR(cqr); ret = dasd_sleep_on(cqr); + if (ret == 0) + memcpy(rdc_buffer, cqr->data, rdc_buffer_size); dasd_sfree_request(cqr, cqr->memdev); return ret; } From fcc082f35c6d565d351b5b89bb03a82333e9ffe8 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 4 Mar 2019 08:25:00 +0100 Subject: [PATCH 54/54] Revert "s390/cpum_cf: Add kernel message exaplanations" This reverts commit fb3a0b61e0d4e435016cc91575d051f841791da0. Signed-off-by: Martin Schwidefsky --- Documentation/kmsg/s390/cpum_cf_common | 36 --------------------- Documentation/kmsg/s390/cpum_cf_diag | 45 -------------------------- 2 files changed, 81 deletions(-) delete mode 100644 Documentation/kmsg/s390/cpum_cf_common delete mode 100644 Documentation/kmsg/s390/cpum_cf_diag diff --git a/Documentation/kmsg/s390/cpum_cf_common b/Documentation/kmsg/s390/cpum_cf_common deleted file mode 100644 index c890a5dabdd6..000000000000 --- a/Documentation/kmsg/s390/cpum_cf_common +++ /dev/null @@ -1,36 +0,0 @@ -/*? - * Text: "CPU[%i] Counter data was lost\n" - * Severity: Error - * Parameter: - * @1: CPU identifier - * Description: - * The kernel could not extract data provided by the Performance Measurement Unit - * (PMU) fast enough, some data is overwritten. - * User action: - * None - */ - -/*? - * Text: "CPU[%i] MT counter data was lost\n" - * Severity: Warning - * Parameter: - * @1: CPU identifier - * Description: - * The kernel could not extract data provided by the Performance Measurement Unit - * (PMU) fast enough, some data is overwritten. - * User action: - * None - */ - -/*? - * Text: "Registering for CPU-measurement alerts failed with rc=%i\n" - * Severity: Error - * Parameter: - * @1: Return code of failed interrupt handler registration - * Description: - * Please insert description here - * User action: - * Reboot. If the problem persists, gather Linux debug data and report - * the problem to your support organization. - */ -/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/cpum_cf_diag b/Documentation/kmsg/s390/cpum_cf_diag deleted file mode 100644 index fd166f96ded8..000000000000 --- a/Documentation/kmsg/s390/cpum_cf_diag +++ /dev/null @@ -1,45 +0,0 @@ -/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ -/*? Text: "cpum_cf_diag counter set compare error in set %i\n" */ -/*? Text: "Registration of s390dbf(cpum_cf_diag) failed\n" */ -/*? Text: "Registration of PMU(cpum_cf_diag) failed with rc=%i\n" */ - -/*? - * Text: "Enabling the performance measuring unit failed with rc=%x\n" - * Severity: Error - * Parameter: - * @1: Error code returned by PMU enable function - * Description: - * The CPU Measurement Facilitiy for counters needs to be enabled to extract - * counter data. - * User action: - * To use the CPU-measurement counter facility, authorize the LPAR for each - * counter set you want to use. Customize the LPAR activation profile and modify - * the Counter Facility Security Options. Then reboot. - * If the problem persists, gather Linux debug data and report the - * problem to your support organization. - */ - -/*? - * Text: "Disabling the performance measuring unit failed with rc=%x\n" - * Severity: Error - * Parameter: - * @1: Error code returned by PMU disable function - * Description: - * The CPU Measurement Facilitiy for counters is disabled to stop extraction - * of counter data. - * User action: - * Reboot. If the problem persists, gather Linux debug data and report the - * problem to your support organization. - */ - -/*? - * Text: "Insufficient memory for PMU(cpum_cf_diag) need=%zu\n" - * Severity: Error - * Parameter: - * @1: Required amount of bytes needed to store the counter sets. - * Description: - * The preallocted memory to store counter sets is too small. - * User action: - * If the problem persists, gather Linux debug data and report the - * problem to your support organization. - */