OpenCloudOS-Kernel/arch/powerpc/include/asm/unistd.h

435 lines
12 KiB
C
Raw Normal View History

#ifndef _ASM_POWERPC_UNISTD_H_
#define _ASM_POWERPC_UNISTD_H_
/*
* This file contains the system call numbers.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define __NR_restart_syscall 0
#define __NR_exit 1
#define __NR_fork 2
#define __NR_read 3
#define __NR_write 4
#define __NR_open 5
#define __NR_close 6
#define __NR_waitpid 7
#define __NR_creat 8
#define __NR_link 9
#define __NR_unlink 10
#define __NR_execve 11
#define __NR_chdir 12
#define __NR_time 13
#define __NR_mknod 14
#define __NR_chmod 15
#define __NR_lchown 16
#define __NR_break 17
#define __NR_oldstat 18
#define __NR_lseek 19
#define __NR_getpid 20
#define __NR_mount 21
#define __NR_umount 22
#define __NR_setuid 23
#define __NR_getuid 24
#define __NR_stime 25
#define __NR_ptrace 26
#define __NR_alarm 27
#define __NR_oldfstat 28
#define __NR_pause 29
#define __NR_utime 30
#define __NR_stty 31
#define __NR_gtty 32
#define __NR_access 33
#define __NR_nice 34
#define __NR_ftime 35
#define __NR_sync 36
#define __NR_kill 37
#define __NR_rename 38
#define __NR_mkdir 39
#define __NR_rmdir 40
#define __NR_dup 41
#define __NR_pipe 42
#define __NR_times 43
#define __NR_prof 44
#define __NR_brk 45
#define __NR_setgid 46
#define __NR_getgid 47
#define __NR_signal 48
#define __NR_geteuid 49
#define __NR_getegid 50
#define __NR_acct 51
#define __NR_umount2 52
#define __NR_lock 53
#define __NR_ioctl 54
#define __NR_fcntl 55
#define __NR_mpx 56
#define __NR_setpgid 57
#define __NR_ulimit 58
#define __NR_oldolduname 59
#define __NR_umask 60
#define __NR_chroot 61
#define __NR_ustat 62
#define __NR_dup2 63
#define __NR_getppid 64
#define __NR_getpgrp 65
#define __NR_setsid 66
#define __NR_sigaction 67
#define __NR_sgetmask 68
#define __NR_ssetmask 69
#define __NR_setreuid 70
#define __NR_setregid 71
#define __NR_sigsuspend 72
#define __NR_sigpending 73
#define __NR_sethostname 74
#define __NR_setrlimit 75
#define __NR_getrlimit 76
#define __NR_getrusage 77
#define __NR_gettimeofday 78
#define __NR_settimeofday 79
#define __NR_getgroups 80
#define __NR_setgroups 81
#define __NR_select 82
#define __NR_symlink 83
#define __NR_oldlstat 84
#define __NR_readlink 85
#define __NR_uselib 86
#define __NR_swapon 87
#define __NR_reboot 88
#define __NR_readdir 89
#define __NR_mmap 90
#define __NR_munmap 91
#define __NR_truncate 92
#define __NR_ftruncate 93
#define __NR_fchmod 94
#define __NR_fchown 95
#define __NR_getpriority 96
#define __NR_setpriority 97
#define __NR_profil 98
#define __NR_statfs 99
#define __NR_fstatfs 100
#define __NR_ioperm 101
#define __NR_socketcall 102
#define __NR_syslog 103
#define __NR_setitimer 104
#define __NR_getitimer 105
#define __NR_stat 106
#define __NR_lstat 107
#define __NR_fstat 108
#define __NR_olduname 109
#define __NR_iopl 110
#define __NR_vhangup 111
#define __NR_idle 112
#define __NR_vm86 113
#define __NR_wait4 114
#define __NR_swapoff 115
#define __NR_sysinfo 116
#define __NR_ipc 117
#define __NR_fsync 118
#define __NR_sigreturn 119
#define __NR_clone 120
#define __NR_setdomainname 121
#define __NR_uname 122
#define __NR_modify_ldt 123
#define __NR_adjtimex 124
#define __NR_mprotect 125
#define __NR_sigprocmask 126
#define __NR_create_module 127
#define __NR_init_module 128
#define __NR_delete_module 129
#define __NR_get_kernel_syms 130
#define __NR_quotactl 131
#define __NR_getpgid 132
#define __NR_fchdir 133
#define __NR_bdflush 134
#define __NR_sysfs 135
#define __NR_personality 136
#define __NR_afs_syscall 137 /* Syscall for Andrew File System */
#define __NR_setfsuid 138
#define __NR_setfsgid 139
#define __NR__llseek 140
#define __NR_getdents 141
#define __NR__newselect 142
#define __NR_flock 143
#define __NR_msync 144
#define __NR_readv 145
#define __NR_writev 146
#define __NR_getsid 147
#define __NR_fdatasync 148
#define __NR__sysctl 149
#define __NR_mlock 150
#define __NR_munlock 151
#define __NR_mlockall 152
#define __NR_munlockall 153
#define __NR_sched_setparam 154
#define __NR_sched_getparam 155
#define __NR_sched_setscheduler 156
#define __NR_sched_getscheduler 157
#define __NR_sched_yield 158
#define __NR_sched_get_priority_max 159
#define __NR_sched_get_priority_min 160
#define __NR_sched_rr_get_interval 161
#define __NR_nanosleep 162
#define __NR_mremap 163
#define __NR_setresuid 164
#define __NR_getresuid 165
#define __NR_query_module 166
#define __NR_poll 167
#define __NR_nfsservctl 168
#define __NR_setresgid 169
#define __NR_getresgid 170
#define __NR_prctl 171
#define __NR_rt_sigreturn 172
#define __NR_rt_sigaction 173
#define __NR_rt_sigprocmask 174
#define __NR_rt_sigpending 175
#define __NR_rt_sigtimedwait 176
#define __NR_rt_sigqueueinfo 177
#define __NR_rt_sigsuspend 178
#define __NR_pread64 179
#define __NR_pwrite64 180
#define __NR_chown 181
#define __NR_getcwd 182
#define __NR_capget 183
#define __NR_capset 184
#define __NR_sigaltstack 185
#define __NR_sendfile 186
#define __NR_getpmsg 187 /* some people actually want streams */
#define __NR_putpmsg 188 /* some people actually want streams */
#define __NR_vfork 189
#define __NR_ugetrlimit 190 /* SuS compliant getrlimit */
#define __NR_readahead 191
#ifndef __powerpc64__ /* these are 32-bit only */
#define __NR_mmap2 192
#define __NR_truncate64 193
#define __NR_ftruncate64 194
#define __NR_stat64 195
#define __NR_lstat64 196
#define __NR_fstat64 197
#endif
#define __NR_pciconfig_read 198
#define __NR_pciconfig_write 199
#define __NR_pciconfig_iobase 200
#define __NR_multiplexer 201
#define __NR_getdents64 202
#define __NR_pivot_root 203
#ifndef __powerpc64__
#define __NR_fcntl64 204
#endif
#define __NR_madvise 205
#define __NR_mincore 206
#define __NR_gettid 207
#define __NR_tkill 208
#define __NR_setxattr 209
#define __NR_lsetxattr 210
#define __NR_fsetxattr 211
#define __NR_getxattr 212
#define __NR_lgetxattr 213
#define __NR_fgetxattr 214
#define __NR_listxattr 215
#define __NR_llistxattr 216
#define __NR_flistxattr 217
#define __NR_removexattr 218
#define __NR_lremovexattr 219
#define __NR_fremovexattr 220
#define __NR_futex 221
#define __NR_sched_setaffinity 222
#define __NR_sched_getaffinity 223
/* 224 currently unused */
#define __NR_tuxcall 225
#ifndef __powerpc64__
#define __NR_sendfile64 226
#endif
#define __NR_io_setup 227
#define __NR_io_destroy 228
#define __NR_io_getevents 229
#define __NR_io_submit 230
#define __NR_io_cancel 231
#define __NR_set_tid_address 232
#define __NR_fadvise64 233
#define __NR_exit_group 234
#define __NR_lookup_dcookie 235
#define __NR_epoll_create 236
#define __NR_epoll_ctl 237
#define __NR_epoll_wait 238
#define __NR_remap_file_pages 239
#define __NR_timer_create 240
#define __NR_timer_settime 241
#define __NR_timer_gettime 242
#define __NR_timer_getoverrun 243
#define __NR_timer_delete 244
#define __NR_clock_settime 245
#define __NR_clock_gettime 246
#define __NR_clock_getres 247
#define __NR_clock_nanosleep 248
#define __NR_swapcontext 249
#define __NR_tgkill 250
#define __NR_utimes 251
#define __NR_statfs64 252
#define __NR_fstatfs64 253
#ifndef __powerpc64__
#define __NR_fadvise64_64 254
#endif
#define __NR_rtas 255
#define __NR_sys_debug_setcontext 256
/* Number 257 is reserved for vserver */
#define __NR_migrate_pages 258
#define __NR_mbind 259
#define __NR_get_mempolicy 260
#define __NR_set_mempolicy 261
#define __NR_mq_open 262
#define __NR_mq_unlink 263
#define __NR_mq_timedsend 264
#define __NR_mq_timedreceive 265
#define __NR_mq_notify 266
#define __NR_mq_getsetattr 267
#define __NR_kexec_load 268
#define __NR_add_key 269
#define __NR_request_key 270
#define __NR_keyctl 271
#define __NR_waitid 272
#define __NR_ioprio_set 273
#define __NR_ioprio_get 274
#define __NR_inotify_init 275
#define __NR_inotify_add_watch 276
#define __NR_inotify_rm_watch 277
#define __NR_spu_run 278
#define __NR_spu_create 279
#define __NR_pselect6 280
#define __NR_ppoll 281
#define __NR_unshare 282
#define __NR_splice 283
#define __NR_tee 284
#define __NR_vmsplice 285
#define __NR_openat 286
#define __NR_mkdirat 287
#define __NR_mknodat 288
#define __NR_fchownat 289
#define __NR_futimesat 290
#ifdef __powerpc64__
#define __NR_newfstatat 291
#else
#define __NR_fstatat64 291
#endif
#define __NR_unlinkat 292
#define __NR_renameat 293
#define __NR_linkat 294
#define __NR_symlinkat 295
#define __NR_readlinkat 296
#define __NR_fchmodat 297
#define __NR_faccessat 298
#define __NR_get_robust_list 299
#define __NR_set_robust_list 300
#define __NR_move_pages 301
#define __NR_getcpu 302
#define __NR_epoll_pwait 303
#define __NR_utimensat 304
#define __NR_signalfd 305
#define __NR_timerfd_create 306
#define __NR_eventfd 307
#define __NR_sync_file_range2 308
sys_fallocate() implementation on i386, x86_64 and powerpc fallocate() is a new system call being proposed here which will allow applications to preallocate space to any file(s) in a file system. Each file system implementation that wants to use this feature will need to support an inode operation called ->fallocate(). Applications can use this feature to avoid fragmentation to certain level and thus get faster access speed. With preallocation, applications also get a guarantee of space for particular file(s) - even if later the the system becomes full. Currently, glibc provides an interface called posix_fallocate() which can be used for similar cause. Though this has the advantage of working on all file systems, but it is quite slow (since it writes zeroes to each block that has to be preallocated). Without a doubt, file systems can do this more efficiently within the kernel, by implementing the proposed fallocate() system call. It is expected that posix_fallocate() will be modified to call this new system call first and incase the kernel/filesystem does not implement it, it should fall back to the current implementation of writing zeroes to the new blocks. ToDos: 1. Implementation on other architectures (other than i386, x86_64, and ppc). Patches for s390(x) and ia64 are already available from previous posts, but it was decided that they should be added later once fallocate is in the mainline. Hence not including those patches in this take. 2. Changes to glibc, a) to support fallocate() system call b) to make posix_fallocate() and posix_fallocate64() call fallocate() Signed-off-by: Amit Arora <aarora@in.ibm.com>
2007-07-18 09:42:44 +08:00
#define __NR_fallocate 309
[POWERPC] Provide a way to protect 4k subpages when using 64k pages Using 64k pages on 64-bit PowerPC systems makes life difficult for emulators that are trying to emulate an ISA, such as x86, which use a smaller page size, since the emulator can no longer use the MMU and the normal system calls for controlling page protections. Of course, the emulator can emulate the MMU by checking and possibly remapping the address for each memory access in software, but that is pretty slow. This provides a facility for such programs to control the access permissions on individual 4k sub-pages of 64k pages. The idea is that the emulator supplies an array of protection masks to apply to a specified range of virtual addresses. These masks are applied at the level where hardware PTEs are inserted into the hardware page table based on the Linux PTEs, so the Linux PTEs are not affected. Note that this new mechanism does not allow any access that would otherwise be prohibited; it can only prohibit accesses that would otherwise be allowed. This new facility is only available on 64-bit PowerPC and only when the kernel is configured for 64k pages. The masks are supplied using a new subpage_prot system call, which takes a starting virtual address and length, and a pointer to an array of protection masks in memory. The array has a 32-bit word per 64k page to be protected; each 32-bit word consists of 16 2-bit fields, for which 0 allows any access (that is otherwise allowed), 1 prevents write accesses, and 2 or 3 prevent any access. Implicit in this is that the regions of the address space that are protected are switched to use 4k hardware pages rather than 64k hardware pages (on machines with hardware 64k page support). In fact the whole process is switched to use 4k hardware pages when the subpage_prot system call is used, but this could be improved in future to switch only the affected segments. The subpage protection bits are stored in a 3 level tree akin to the page table tree. The top level of this tree is stored in a structure that is appended to the top level of the page table tree, i.e., the pgd array. Since it will often only be 32-bit addresses (below 4GB) that are protected, the pointers to the first four bottom level pages are also stored in this structure (each bottom level page contains the protection bits for 1GB of address space), so the protection bits for addresses below 4GB can be accessed with one fewer loads than those for higher addresses. Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-01-24 05:35:13 +08:00
#define __NR_subpage_prot 310
#define __NR_timerfd_settime 311
#define __NR_timerfd_gettime 312
#define __NR_signalfd4 313
#define __NR_eventfd2 314
#define __NR_epoll_create1 315
#define __NR_dup3 316
#define __NR_pipe2 317
#define __NR_inotify_init1 318
perf: Do the big rename: Performance Counters -> Performance Events Bye-bye Performance Counters, welcome Performance Events! In the past few months the perfcounters subsystem has grown out its initial role of counting hardware events, and has become (and is becoming) a much broader generic event enumeration, reporting, logging, monitoring, analysis facility. Naming its core object 'perf_counter' and naming the subsystem 'perfcounters' has become more and more of a misnomer. With pending code like hw-breakpoints support the 'counter' name is less and less appropriate. All in one, we've decided to rename the subsystem to 'performance events' and to propagate this rename through all fields, variables and API names. (in an ABI compatible fashion) The word 'event' is also a bit shorter than 'counter' - which makes it slightly more convenient to write/handle as well. Thanks goes to Stephane Eranian who first observed this misnomer and suggested a rename. User-space tooling and ABI compatibility is not affected - this patch should be function-invariant. (Also, defconfigs were not touched to keep the size down.) This patch has been generated via the following script: FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/PERF_EVENT_/PERF_RECORD_/g' \ -e 's/PERF_COUNTER/PERF_EVENT/g' \ -e 's/perf_counter/perf_event/g' \ -e 's/nb_counters/nb_events/g' \ -e 's/swcounter/swevent/g' \ -e 's/tpcounter_event/tp_event/g' \ $FILES for N in $(find . -name perf_counter.[ch]); do M=$(echo $N | sed 's/perf_counter/perf_event/g') mv $N $M done FILES=$(find . -name perf_event.*) sed -i \ -e 's/COUNTER_MASK/REG_MASK/g' \ -e 's/COUNTER/EVENT/g' \ -e 's/\<event\>/event_id/g' \ -e 's/counter/event/g' \ -e 's/Counter/Event/g' \ $FILES ... to keep it as correct as possible. This script can also be used by anyone who has pending perfcounters patches - it converts a Linux kernel tree over to the new naming. We tried to time this change to the point in time where the amount of pending patches is the smallest: the end of the merge window. Namespace clashes were fixed up in a preparatory patch - and some stylistic fallout will be fixed up in a subsequent patch. ( NOTE: 'counters' are still the proper terminology when we deal with hardware registers - and these sed scripts are a bit over-eager in renaming them. I've undone some of that, but in case there's something left where 'counter' would be better than 'event' we can undo that on an individual basis instead of touching an otherwise nicely automated patch. ) Suggested-by: Stephane Eranian <eranian@google.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Paul Mackerras <paulus@samba.org> Reviewed-by: Arjan van de Ven <arjan@linux.intel.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Howells <dhowells@redhat.com> Cc: Kyle McMartin <kyle@mcmartin.ca> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: <linux-arch@vger.kernel.org> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-09-21 18:02:48 +08:00
#define __NR_perf_event_open 319
#define __NR_preadv 320
#define __NR_pwritev 321
#define __NR_rt_tgsigqueueinfo 322
#define __NR_fanotify_init 323
#define __NR_fanotify_mark 324
#define __NR_prlimit64 325
#define __NR_socket 326
#define __NR_bind 327
#define __NR_connect 328
#define __NR_listen 329
#define __NR_accept 330
#define __NR_getsockname 331
#define __NR_getpeername 332
#define __NR_socketpair 333
#define __NR_send 334
#define __NR_sendto 335
#define __NR_recv 336
#define __NR_recvfrom 337
#define __NR_shutdown 338
#define __NR_setsockopt 339
#define __NR_getsockopt 340
#define __NR_sendmsg 341
#define __NR_recvmsg 342
#define __NR_recvmmsg 343
#define __NR_accept4 344
#define __NR_name_to_handle_at 345
#define __NR_open_by_handle_at 346
#define __NR_clock_adjtime 347
#define __NR_syncfs 348
#define __NR_sendmmsg 349
ns: Wire up the setns system call 32bit and 64bit on x86 are tested and working. The rest I have looked at closely and I can't find any problems. setns is an easy system call to wire up. It just takes two ints so I don't expect any weird architecture porting problems. While doing this I have noticed that we have some architectures that are very slow to get new system calls. cris seems to be the slowest where the last system calls wired up were preadv and pwritev. avr32 is weird in that recvmmsg was wired up but never declared in unistd.h. frv is behind with perf_event_open being the last syscall wired up. On h8300 the last system call wired up was epoll_wait. On m32r the last system call wired up was fallocate. mn10300 has recvmmsg as the last system call wired up. The rest seem to at least have syncfs wired up which was new in the 2.6.39. v2: Most of the architecture support added by Daniel Lezcano <dlezcano@fr.ibm.com> v3: ported to v2.6.36-rc4 by: Eric W. Biederman <ebiederm@xmission.com> v4: Moved wiring up of the system call to another patch v5: ported to v2.6.39-rc6 v6: rebased onto parisc-next and net-next to avoid syscall conflicts. v7: ported to Linus's latest post 2.6.39 tree. >  arch/blackfin/include/asm/unistd.h     |    3 ++- >  arch/blackfin/mach-common/entry.S      |    1 + Acked-by: Mike Frysinger <vapier@gentoo.org> Oh - ia64 wiring looks good. Acked-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-05-28 10:28:27 +08:00
#define __NR_setns 350
Cross Memory Attach The basic idea behind cross memory attach is to allow MPI programs doing intra-node communication to do a single copy of the message rather than a double copy of the message via shared memory. The following patch attempts to achieve this by allowing a destination process, given an address and size from a source process, to copy memory directly from the source process into its own address space via a system call. There is also a symmetrical ability to copy from the current process's address space into a destination process's address space. - Use of /proc/pid/mem has been considered, but there are issues with using it: - Does not allow for specifying iovecs for both src and dest, assuming preadv or pwritev was implemented either the area read from or written to would need to be contiguous. - Currently mem_read allows only processes who are currently ptrace'ing the target and are still able to ptrace the target to read from the target. This check could possibly be moved to the open call, but its not clear exactly what race this restriction is stopping (reason appears to have been lost) - Having to send the fd of /proc/self/mem via SCM_RIGHTS on unix domain socket is a bit ugly from a userspace point of view, especially when you may have hundreds if not (eventually) thousands of processes that all need to do this with each other - Doesn't allow for some future use of the interface we would like to consider adding in the future (see below) - Interestingly reading from /proc/pid/mem currently actually involves two copies! (But this could be fixed pretty easily) As mentioned previously use of vmsplice instead was considered, but has problems. Since you need the reader and writer working co-operatively if the pipe is not drained then you block. Which requires some wrapping to do non blocking on the send side or polling on the receive. In all to all communication it requires ordering otherwise you can deadlock. And in the example of many MPI tasks writing to one MPI task vmsplice serialises the copying. There are some cases of MPI collectives where even a single copy interface does not get us the performance gain we could. For example in an MPI_Reduce rather than copy the data from the source we would like to instead use it directly in a mathops (say the reduce is doing a sum) as this would save us doing a copy. We don't need to keep a copy of the data from the source. I haven't implemented this, but I think this interface could in the future do all this through the use of the flags - eg could specify the math operation and type and the kernel rather than just copying the data would apply the specified operation between the source and destination and store it in the destination. Although we don't have a "second user" of the interface (though I've had some nibbles from people who may be interested in using it for intra process messaging which is not MPI). This interface is something which hardware vendors are already doing for their custom drivers to implement fast local communication. And so in addition to this being useful for OpenMPI it would mean the driver maintainers don't have to fix things up when the mm changes. There was some discussion about how much faster a true zero copy would go. Here's a link back to the email with some testing I did on that: http://marc.info/?l=linux-mm&m=130105930902915&w=2 There is a basic man page for the proposed interface here: http://ozlabs.org/~cyeoh/cma/process_vm_readv.txt This has been implemented for x86 and powerpc, other architecture should mainly (I think) just need to add syscall numbers for the process_vm_readv and process_vm_writev. There are 32 bit compatibility versions for 64-bit kernels. For arch maintainers there are some simple tests to be able to quickly verify that the syscalls are working correctly here: http://ozlabs.org/~cyeoh/cma/cma-test-20110718.tgz Signed-off-by: Chris Yeoh <yeohc@au1.ibm.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Howells <dhowells@redhat.com> Cc: James Morris <jmorris@namei.org> Cc: <linux-man@vger.kernel.org> Cc: <linux-arch@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-11-01 08:06:39 +08:00
#define __NR_process_vm_readv 351
#define __NR_process_vm_writev 352
#ifdef __KERNEL__
Cross Memory Attach The basic idea behind cross memory attach is to allow MPI programs doing intra-node communication to do a single copy of the message rather than a double copy of the message via shared memory. The following patch attempts to achieve this by allowing a destination process, given an address and size from a source process, to copy memory directly from the source process into its own address space via a system call. There is also a symmetrical ability to copy from the current process's address space into a destination process's address space. - Use of /proc/pid/mem has been considered, but there are issues with using it: - Does not allow for specifying iovecs for both src and dest, assuming preadv or pwritev was implemented either the area read from or written to would need to be contiguous. - Currently mem_read allows only processes who are currently ptrace'ing the target and are still able to ptrace the target to read from the target. This check could possibly be moved to the open call, but its not clear exactly what race this restriction is stopping (reason appears to have been lost) - Having to send the fd of /proc/self/mem via SCM_RIGHTS on unix domain socket is a bit ugly from a userspace point of view, especially when you may have hundreds if not (eventually) thousands of processes that all need to do this with each other - Doesn't allow for some future use of the interface we would like to consider adding in the future (see below) - Interestingly reading from /proc/pid/mem currently actually involves two copies! (But this could be fixed pretty easily) As mentioned previously use of vmsplice instead was considered, but has problems. Since you need the reader and writer working co-operatively if the pipe is not drained then you block. Which requires some wrapping to do non blocking on the send side or polling on the receive. In all to all communication it requires ordering otherwise you can deadlock. And in the example of many MPI tasks writing to one MPI task vmsplice serialises the copying. There are some cases of MPI collectives where even a single copy interface does not get us the performance gain we could. For example in an MPI_Reduce rather than copy the data from the source we would like to instead use it directly in a mathops (say the reduce is doing a sum) as this would save us doing a copy. We don't need to keep a copy of the data from the source. I haven't implemented this, but I think this interface could in the future do all this through the use of the flags - eg could specify the math operation and type and the kernel rather than just copying the data would apply the specified operation between the source and destination and store it in the destination. Although we don't have a "second user" of the interface (though I've had some nibbles from people who may be interested in using it for intra process messaging which is not MPI). This interface is something which hardware vendors are already doing for their custom drivers to implement fast local communication. And so in addition to this being useful for OpenMPI it would mean the driver maintainers don't have to fix things up when the mm changes. There was some discussion about how much faster a true zero copy would go. Here's a link back to the email with some testing I did on that: http://marc.info/?l=linux-mm&m=130105930902915&w=2 There is a basic man page for the proposed interface here: http://ozlabs.org/~cyeoh/cma/process_vm_readv.txt This has been implemented for x86 and powerpc, other architecture should mainly (I think) just need to add syscall numbers for the process_vm_readv and process_vm_writev. There are 32 bit compatibility versions for 64-bit kernels. For arch maintainers there are some simple tests to be able to quickly verify that the syscalls are working correctly here: http://ozlabs.org/~cyeoh/cma/cma-test-20110718.tgz Signed-off-by: Chris Yeoh <yeohc@au1.ibm.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Howells <dhowells@redhat.com> Cc: James Morris <jmorris@namei.org> Cc: <linux-man@vger.kernel.org> Cc: <linux-arch@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-11-01 08:06:39 +08:00
#define __NR_syscalls 353
#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/linkage.h>
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_ALARM
#define __ARCH_WANT_SYS_GETHOSTNAME
Add generic sys_ipc wrapper Add a generic implementation of the ipc demultiplexer syscall. Except for s390 and sparc64 all implementations of the sys_ipc are nearly identical. There are slight differences in the types of the parameters, where mips and powerpc as the only 64-bit architectures with sys_ipc use unsigned long for the "third" argument as it gets casted to a pointer later, while it traditionally is an "int" like most other paramters. frv goes even further and uses unsigned long for all parameters execept for "ptr" which is a pointer type everywhere. The change from int to unsigned long for "third" and back to "int" for the others on frv should be fine due to the in-register calling conventions for syscalls (we already had a similar issue with the generic sys_ptrace), but I'd prefer to have the arch maintainers looks over this in details. Except for that h8300, m68k and m68knommu lack an impplementation of the semtimedop sub call which this patch adds, and various architectures have gets used - at least on i386 it seems superflous as the compat code on x86-64 and ia64 doesn't even bother to implement it. [akpm@linux-foundation.org: add sys_ipc to sys_ni.c] Signed-off-by: Christoph Hellwig <hch@lst.de> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Jeff Dike <jdike@addtoit.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Reviewed-by: H. Peter Anvin <hpa@zytor.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: James Morris <jmorris@namei.org> Cc: Andreas Schwab <schwab@linux-m68k.org> Acked-by: Jesper Nilsson <jesper.nilsson@axis.com> Acked-by: Russell King <rmk+kernel@arm.linux.org.uk> Acked-by: David Howells <dhowells@redhat.com> Acked-by: Kyle McMartin <kyle@mcmartin.ca> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2010-03-11 07:21:18 +08:00
#define __ARCH_WANT_SYS_IPC
#define __ARCH_WANT_SYS_PAUSE
#define __ARCH_WANT_SYS_SGETMASK
#define __ARCH_WANT_SYS_SIGNAL
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_SYS_UTIME
#define __ARCH_WANT_SYS_WAITPID
#define __ARCH_WANT_SYS_SOCKETCALL
#define __ARCH_WANT_SYS_FADVISE64
#define __ARCH_WANT_SYS_GETPGRP
#define __ARCH_WANT_SYS_LLSEEK
#define __ARCH_WANT_SYS_NICE
#define __ARCH_WANT_SYS_OLD_GETRLIMIT
#define __ARCH_WANT_SYS_OLD_UNAME
#define __ARCH_WANT_SYS_OLDUMOUNT
#define __ARCH_WANT_SYS_SIGPENDING
#define __ARCH_WANT_SYS_SIGPROCMASK
#define __ARCH_WANT_SYS_RT_SIGACTION
#define __ARCH_WANT_SYS_RT_SIGSUSPEND
#ifdef CONFIG_PPC32
#define __ARCH_WANT_OLD_STAT
#endif
#ifdef CONFIG_PPC64
#define __ARCH_WANT_COMPAT_SYS_TIME
#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
#define __ARCH_WANT_SYS_NEWFSTATAT
#endif
/*
* "Conditional" syscalls
*/
#define cond_syscall(x) \
asmlinkage long x (void) __attribute__((weak,alias("sys_ni_syscall")))
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_UNISTD_H_ */