[PATCH] x86: Add portable getcpu call
For NUMA optimization and some other algorithms it is useful to have a fast to get the current CPU and node numbers in user space. x86-64 added a fast way to do this in a vsyscall. This adds a generic syscall for other architectures to make it a generic portable facility. I expect some of them will also implement it as a faster vsyscall. The cache is an optimization for the x86-64 vsyscall optimization. Since what the syscall returns is an approximation anyways and user space often wants very fast results it can be cached for some time. The norma methods to get this information in user space are relatively slow The vsyscall is in a better position to manage the cache because it has direct access to a fast time stamp (jiffies). For the generic syscall optimization it doesn't help much, but enforce a valid argument to keep programs portable I only added an i386 syscall entry for now. Other architectures can follow as needed. AK: Also added some cleanups from Andrew Morton Signed-off-by: Andi Kleen <ak@suse.de>
This commit is contained in:
parent
c08c820508
commit
3cfc348bf9
|
@ -317,3 +317,4 @@ ENTRY(sys_call_table)
|
||||||
.long sys_tee /* 315 */
|
.long sys_tee /* 315 */
|
||||||
.long sys_vmsplice
|
.long sys_vmsplice
|
||||||
.long sys_move_pages
|
.long sys_move_pages
|
||||||
|
.long sys_getcpu
|
||||||
|
|
|
@ -713,4 +713,5 @@ ia32_sys_call_table:
|
||||||
.quad sys_tee
|
.quad sys_tee
|
||||||
.quad compat_sys_vmsplice
|
.quad compat_sys_vmsplice
|
||||||
.quad compat_sys_move_pages
|
.quad compat_sys_move_pages
|
||||||
|
.quad sys_getcpu
|
||||||
ia32_syscall_end:
|
ia32_syscall_end:
|
||||||
|
|
|
@ -323,10 +323,11 @@
|
||||||
#define __NR_tee 315
|
#define __NR_tee 315
|
||||||
#define __NR_vmsplice 316
|
#define __NR_vmsplice 316
|
||||||
#define __NR_move_pages 317
|
#define __NR_move_pages 317
|
||||||
|
#define __NR_getcpu 318
|
||||||
|
|
||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
|
|
||||||
#define NR_syscalls 318
|
#define NR_syscalls 319
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* user-visible error numbers are in the range -1 - -128: see
|
* user-visible error numbers are in the range -1 - -128: see
|
||||||
|
|
|
@ -53,6 +53,7 @@ struct mq_attr;
|
||||||
struct compat_stat;
|
struct compat_stat;
|
||||||
struct compat_timeval;
|
struct compat_timeval;
|
||||||
struct robust_list_head;
|
struct robust_list_head;
|
||||||
|
struct getcpu_cache;
|
||||||
|
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/aio_abi.h>
|
#include <linux/aio_abi.h>
|
||||||
|
@ -596,5 +597,6 @@ asmlinkage long sys_get_robust_list(int pid,
|
||||||
size_t __user *len_ptr);
|
size_t __user *len_ptr);
|
||||||
asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
|
asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
|
||||||
size_t len);
|
size_t len);
|
||||||
|
asmlinkage long sys_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *cache);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
31
kernel/sys.c
31
kernel/sys.c
|
@ -28,6 +28,7 @@
|
||||||
#include <linux/tty.h>
|
#include <linux/tty.h>
|
||||||
#include <linux/signal.h>
|
#include <linux/signal.h>
|
||||||
#include <linux/cn_proc.h>
|
#include <linux/cn_proc.h>
|
||||||
|
#include <linux/getcpu.h>
|
||||||
|
|
||||||
#include <linux/compat.h>
|
#include <linux/compat.h>
|
||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
|
@ -2062,3 +2063,33 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
|
||||||
}
|
}
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
|
||||||
|
struct getcpu_cache __user *cache)
|
||||||
|
{
|
||||||
|
int err = 0;
|
||||||
|
int cpu = raw_smp_processor_id();
|
||||||
|
if (cpup)
|
||||||
|
err |= put_user(cpu, cpup);
|
||||||
|
if (nodep)
|
||||||
|
err |= put_user(cpu_to_node(cpu), nodep);
|
||||||
|
if (cache) {
|
||||||
|
/*
|
||||||
|
* The cache is not needed for this implementation,
|
||||||
|
* but make sure user programs pass something
|
||||||
|
* valid. vsyscall implementations can instead make
|
||||||
|
* good use of the cache. Only use t0 and t1 because
|
||||||
|
* these are available in both 32bit and 64bit ABI (no
|
||||||
|
* need for a compat_getcpu). 32bit has enough
|
||||||
|
* padding
|
||||||
|
*/
|
||||||
|
unsigned long t0, t1;
|
||||||
|
get_user(t0, &cache->t0);
|
||||||
|
get_user(t1, &cache->t1);
|
||||||
|
t0++;
|
||||||
|
t1++;
|
||||||
|
put_user(t0, &cache->t0);
|
||||||
|
put_user(t1, &cache->t1);
|
||||||
|
}
|
||||||
|
return err ? -EFAULT : 0;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue