2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* syscalls.h - Linux syscall interfaces (non-arch-specific)
|
|
|
|
*
|
|
|
|
* Copyright (c) 2004 Randy Dunlap
|
|
|
|
* Copyright (c) 2004 Open Source Development Labs
|
|
|
|
*
|
|
|
|
* This file is released under the GPLv2.
|
|
|
|
* See the file COPYING for more details.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _LINUX_SYSCALLS_H
|
|
|
|
#define _LINUX_SYSCALLS_H
|
|
|
|
|
|
|
|
struct epoll_event;
|
|
|
|
struct iattr;
|
|
|
|
struct inode;
|
|
|
|
struct iocb;
|
|
|
|
struct io_event;
|
|
|
|
struct iovec;
|
|
|
|
struct itimerspec;
|
|
|
|
struct itimerval;
|
|
|
|
struct kexec_segment;
|
|
|
|
struct linux_dirent;
|
|
|
|
struct linux_dirent64;
|
|
|
|
struct list_head;
|
|
|
|
struct msgbuf;
|
|
|
|
struct msghdr;
|
|
|
|
struct msqid_ds;
|
|
|
|
struct new_utsname;
|
|
|
|
struct nfsctl_arg;
|
|
|
|
struct __old_kernel_stat;
|
|
|
|
struct pollfd;
|
|
|
|
struct rlimit;
|
|
|
|
struct rusage;
|
|
|
|
struct sched_param;
|
|
|
|
struct semaphore;
|
|
|
|
struct sembuf;
|
|
|
|
struct shmid_ds;
|
|
|
|
struct sockaddr;
|
|
|
|
struct stat;
|
|
|
|
struct stat64;
|
|
|
|
struct statfs;
|
|
|
|
struct statfs64;
|
|
|
|
struct __sysctl_args;
|
|
|
|
struct sysinfo;
|
|
|
|
struct timespec;
|
|
|
|
struct timeval;
|
|
|
|
struct timex;
|
|
|
|
struct timezone;
|
|
|
|
struct tms;
|
|
|
|
struct utimbuf;
|
|
|
|
struct mq_attr;
|
2006-02-01 19:04:33 +08:00
|
|
|
struct compat_stat;
|
|
|
|
struct compat_timeval;
|
2006-05-23 22:46:40 +08:00
|
|
|
struct robust_list_head;
|
2006-09-26 16:52:28 +08:00
|
|
|
struct getcpu_cache;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/aio_abi.h>
|
|
|
|
#include <linux/capability.h>
|
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/sem.h>
|
|
|
|
#include <asm/siginfo.h>
|
|
|
|
#include <asm/signal.h>
|
|
|
|
#include <linux/quota.h>
|
|
|
|
#include <linux/key.h>
|
|
|
|
|
|
|
|
asmlinkage long sys_time(time_t __user *tloc);
|
|
|
|
asmlinkage long sys_stime(time_t __user *tptr);
|
|
|
|
asmlinkage long sys_gettimeofday(struct timeval __user *tv,
|
|
|
|
struct timezone __user *tz);
|
|
|
|
asmlinkage long sys_settimeofday(struct timeval __user *tv,
|
|
|
|
struct timezone __user *tz);
|
|
|
|
asmlinkage long sys_adjtimex(struct timex __user *txc_p);
|
|
|
|
|
|
|
|
asmlinkage long sys_times(struct tms __user *tbuf);
|
|
|
|
|
|
|
|
asmlinkage long sys_gettid(void);
|
|
|
|
asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp);
|
|
|
|
asmlinkage unsigned long sys_alarm(unsigned int seconds);
|
|
|
|
asmlinkage long sys_getpid(void);
|
|
|
|
asmlinkage long sys_getppid(void);
|
|
|
|
asmlinkage long sys_getuid(void);
|
|
|
|
asmlinkage long sys_geteuid(void);
|
|
|
|
asmlinkage long sys_getgid(void);
|
|
|
|
asmlinkage long sys_getegid(void);
|
|
|
|
asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid);
|
|
|
|
asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid);
|
|
|
|
asmlinkage long sys_getpgid(pid_t pid);
|
|
|
|
asmlinkage long sys_getpgrp(void);
|
|
|
|
asmlinkage long sys_getsid(pid_t pid);
|
|
|
|
asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist);
|
|
|
|
|
|
|
|
asmlinkage long sys_setregid(gid_t rgid, gid_t egid);
|
|
|
|
asmlinkage long sys_setgid(gid_t gid);
|
|
|
|
asmlinkage long sys_setreuid(uid_t ruid, uid_t euid);
|
|
|
|
asmlinkage long sys_setuid(uid_t uid);
|
|
|
|
asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid);
|
|
|
|
asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid);
|
|
|
|
asmlinkage long sys_setfsuid(uid_t uid);
|
|
|
|
asmlinkage long sys_setfsgid(gid_t gid);
|
|
|
|
asmlinkage long sys_setpgid(pid_t pid, pid_t pgid);
|
|
|
|
asmlinkage long sys_setsid(void);
|
|
|
|
asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist);
|
|
|
|
|
|
|
|
asmlinkage long sys_acct(const char __user *name);
|
|
|
|
asmlinkage long sys_capget(cap_user_header_t header,
|
|
|
|
cap_user_data_t dataptr);
|
|
|
|
asmlinkage long sys_capset(cap_user_header_t header,
|
|
|
|
const cap_user_data_t data);
|
|
|
|
asmlinkage long sys_personality(u_long personality);
|
|
|
|
|
|
|
|
asmlinkage long sys_sigpending(old_sigset_t __user *set);
|
|
|
|
asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set,
|
|
|
|
old_sigset_t __user *oset);
|
|
|
|
asmlinkage long sys_getitimer(int which, struct itimerval __user *value);
|
|
|
|
asmlinkage long sys_setitimer(int which,
|
|
|
|
struct itimerval __user *value,
|
|
|
|
struct itimerval __user *ovalue);
|
|
|
|
asmlinkage long sys_timer_create(clockid_t which_clock,
|
|
|
|
struct sigevent __user *timer_event_spec,
|
|
|
|
timer_t __user * created_timer_id);
|
|
|
|
asmlinkage long sys_timer_gettime(timer_t timer_id,
|
|
|
|
struct itimerspec __user *setting);
|
|
|
|
asmlinkage long sys_timer_getoverrun(timer_t timer_id);
|
|
|
|
asmlinkage long sys_timer_settime(timer_t timer_id, int flags,
|
|
|
|
const struct itimerspec __user *new_setting,
|
|
|
|
struct itimerspec __user *old_setting);
|
|
|
|
asmlinkage long sys_timer_delete(timer_t timer_id);
|
|
|
|
asmlinkage long sys_clock_settime(clockid_t which_clock,
|
|
|
|
const struct timespec __user *tp);
|
|
|
|
asmlinkage long sys_clock_gettime(clockid_t which_clock,
|
|
|
|
struct timespec __user *tp);
|
|
|
|
asmlinkage long sys_clock_getres(clockid_t which_clock,
|
|
|
|
struct timespec __user *tp);
|
|
|
|
asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags,
|
|
|
|
const struct timespec __user *rqtp,
|
|
|
|
struct timespec __user *rmtp);
|
|
|
|
|
|
|
|
asmlinkage long sys_nice(int increment);
|
|
|
|
asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
|
|
|
|
struct sched_param __user *param);
|
|
|
|
asmlinkage long sys_sched_setparam(pid_t pid,
|
|
|
|
struct sched_param __user *param);
|
|
|
|
asmlinkage long sys_sched_getscheduler(pid_t pid);
|
|
|
|
asmlinkage long sys_sched_getparam(pid_t pid,
|
|
|
|
struct sched_param __user *param);
|
|
|
|
asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
|
|
|
|
unsigned long __user *user_mask_ptr);
|
|
|
|
asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
|
|
|
|
unsigned long __user *user_mask_ptr);
|
|
|
|
asmlinkage long sys_sched_yield(void);
|
|
|
|
asmlinkage long sys_sched_get_priority_max(int policy);
|
|
|
|
asmlinkage long sys_sched_get_priority_min(int policy);
|
|
|
|
asmlinkage long sys_sched_rr_get_interval(pid_t pid,
|
|
|
|
struct timespec __user *interval);
|
|
|
|
asmlinkage long sys_setpriority(int which, int who, int niceval);
|
|
|
|
asmlinkage long sys_getpriority(int which, int who);
|
|
|
|
|
|
|
|
asmlinkage long sys_shutdown(int, int);
|
|
|
|
asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd,
|
|
|
|
void __user *arg);
|
|
|
|
asmlinkage long sys_restart_syscall(void);
|
2005-06-26 05:58:28 +08:00
|
|
|
asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
|
|
|
|
struct kexec_segment __user *segments,
|
|
|
|
unsigned long flags);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
asmlinkage long sys_exit(int error_code);
|
|
|
|
asmlinkage void sys_exit_group(int error_code);
|
|
|
|
asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr,
|
|
|
|
int options, struct rusage __user *ru);
|
|
|
|
asmlinkage long sys_waitid(int which, pid_t pid,
|
|
|
|
struct siginfo __user *infop,
|
|
|
|
int options, struct rusage __user *ru);
|
|
|
|
asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options);
|
|
|
|
asmlinkage long sys_set_tid_address(int __user *tidptr);
|
[PATCH] pi-futex: futex code cleanups
We are pleased to announce "lightweight userspace priority inheritance" (PI)
support for futexes. The following patchset and glibc patch implements it,
ontop of the robust-futexes patchset which is included in 2.6.16-mm1.
We are calling it lightweight for 3 reasons:
- in the user-space fastpath a PI-enabled futex involves no kernel work
(or any other PI complexity) at all. No registration, no extra kernel
calls - just pure fast atomic ops in userspace.
- in the slowpath (in the lock-contention case), the system call and
scheduling pattern is in fact better than that of normal futexes, due to
the 'integrated' nature of FUTEX_LOCK_PI. [more about that further down]
- the in-kernel PI implementation is streamlined around the mutex
abstraction, with strict rules that keep the implementation relatively
simple: only a single owner may own a lock (i.e. no read-write lock
support), only the owner may unlock a lock, no recursive locking, etc.
Priority Inheritance - why, oh why???
-------------------------------------
Many of you heard the horror stories about the evil PI code circling Linux for
years, which makes no real sense at all and is only used by buggy applications
and which has horrible overhead. Some of you have dreaded this very moment,
when someone actually submits working PI code ;-)
So why would we like to see PI support for futexes?
We'd like to see it done purely for technological reasons. We dont think it's
a buggy concept, we think it's useful functionality to offer to applications,
which functionality cannot be achieved in other ways. We also think it's the
right thing to do, and we think we've got the right arguments and the right
numbers to prove that. We also believe that we can address all the
counter-arguments as well. For these reasons (and the reasons outlined below)
we are submitting this patch-set for upstream kernel inclusion.
What are the benefits of PI?
The short reply:
----------------
User-space PI helps achieving/improving determinism for user-space
applications. In the best-case, it can help achieve determinism and
well-bound latencies. Even in the worst-case, PI will improve the statistical
distribution of locking related application delays.
The longer reply:
-----------------
Firstly, sharing locks between multiple tasks is a common programming
technique that often cannot be replaced with lockless algorithms. As we can
see it in the kernel [which is a quite complex program in itself], lockless
structures are rather the exception than the norm - the current ratio of
lockless vs. locky code for shared data structures is somewhere between 1:10
and 1:100. Lockless is hard, and the complexity of lockless algorithms often
endangers to ability to do robust reviews of said code. I.e. critical RT
apps often choose lock structures to protect critical data structures, instead
of lockless algorithms. Furthermore, there are cases (like shared hardware,
or other resource limits) where lockless access is mathematically impossible.
Media players (such as Jack) are an example of reasonable application design
with multiple tasks (with multiple priority levels) sharing short-held locks:
for example, a highprio audio playback thread is combined with medium-prio
construct-audio-data threads and low-prio display-colory-stuff threads. Add
video and decoding to the mix and we've got even more priority levels.
So once we accept that synchronization objects (locks) are an unavoidable fact
of life, and once we accept that multi-task userspace apps have a very fair
expectation of being able to use locks, we've got to think about how to offer
the option of a deterministic locking implementation to user-space.
Most of the technical counter-arguments against doing priority inheritance
only apply to kernel-space locks. But user-space locks are different, there
we cannot disable interrupts or make the task non-preemptible in a critical
section, so the 'use spinlocks' argument does not apply (user-space spinlocks
have the same priority inversion problems as other user-space locking
constructs). Fact is, pretty much the only technique that currently enables
good determinism for userspace locks (such as futex-based pthread mutexes) is
priority inheritance:
Currently (without PI), if a high-prio and a low-prio task shares a lock [this
is a quite common scenario for most non-trivial RT applications], even if all
critical sections are coded carefully to be deterministic (i.e. all critical
sections are short in duration and only execute a limited number of
instructions), the kernel cannot guarantee any deterministic execution of the
high-prio task: any medium-priority task could preempt the low-prio task while
it holds the shared lock and executes the critical section, and could delay it
indefinitely.
Implementation:
---------------
As mentioned before, the userspace fastpath of PI-enabled pthread mutexes
involves no kernel work at all - they behave quite similarly to normal
futex-based locks: a 0 value means unlocked, and a value==TID means locked.
(This is the same method as used by list-based robust futexes.) Userspace uses
atomic ops to lock/unlock these mutexes without entering the kernel.
To handle the slowpath, we have added two new futex ops:
FUTEX_LOCK_PI
FUTEX_UNLOCK_PI
If the lock-acquire fastpath fails, [i.e. an atomic transition from 0 to TID
fails], then FUTEX_LOCK_PI is called. The kernel does all the remaining work:
if there is no futex-queue attached to the futex address yet then the code
looks up the task that owns the futex [it has put its own TID into the futex
value], and attaches a 'PI state' structure to the futex-queue. The pi_state
includes an rt-mutex, which is a PI-aware, kernel-based synchronization
object. The 'other' task is made the owner of the rt-mutex, and the
FUTEX_WAITERS bit is atomically set in the futex value. Then this task tries
to lock the rt-mutex, on which it blocks. Once it returns, it has the mutex
acquired, and it sets the futex value to its own TID and returns. Userspace
has no other work to perform - it now owns the lock, and futex value contains
FUTEX_WAITERS|TID.
If the unlock side fastpath succeeds, [i.e. userspace manages to do a TID ->
0 atomic transition of the futex value], then no kernel work is triggered.
If the unlock fastpath fails (because the FUTEX_WAITERS bit is set), then
FUTEX_UNLOCK_PI is called, and the kernel unlocks the futex on the behalf of
userspace - and it also unlocks the attached pi_state->rt_mutex and thus wakes
up any potential waiters.
Note that under this approach, contrary to other PI-futex approaches, there is
no prior 'registration' of a PI-futex. [which is not quite possible anyway,
due to existing ABI properties of pthread mutexes.]
Also, under this scheme, 'robustness' and 'PI' are two orthogonal properties
of futexes, and all four combinations are possible: futex, robust-futex,
PI-futex, robust+PI-futex.
glibc support:
--------------
Ulrich Drepper and Jakub Jelinek have written glibc support for PI-futexes
(and robust futexes), enabling robust and PI (PTHREAD_PRIO_INHERIT) POSIX
mutexes. (PTHREAD_PRIO_PROTECT support will be added later on too, no
additional kernel changes are needed for that). [NOTE: The glibc patch is
obviously inofficial and unsupported without matching upstream kernel
functionality.]
the patch-queue and the glibc patch can also be downloaded from:
http://redhat.com/~mingo/PI-futex-patches/
Many thanks go to the people who helped us create this kernel feature: Steven
Rostedt, Esben Nielsen, Benedikt Spranger, Daniel Walker, John Cooper, Arjan
van de Ven, Oleg Nesterov and others. Credits for related prior projects goes
to Dirk Grambow, Inaky Perez-Gonzalez, Bill Huey and many others.
Clean up the futex code, before adding more features to it:
- use u32 as the futex field type - that's the ABI
- use __user and pointers to u32 instead of unsigned long
- code style / comment style cleanups
- rename hash-bucket name from 'bh' to 'hb'.
I checked the pre and post futex.o object files to make sure this
patch has no code effects.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 17:54:47 +08:00
|
|
|
asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
|
2005-04-17 06:20:36 +08:00
|
|
|
struct timespec __user *utime, u32 __user *uaddr2,
|
[PATCH] pi-futex: futex code cleanups
We are pleased to announce "lightweight userspace priority inheritance" (PI)
support for futexes. The following patchset and glibc patch implements it,
ontop of the robust-futexes patchset which is included in 2.6.16-mm1.
We are calling it lightweight for 3 reasons:
- in the user-space fastpath a PI-enabled futex involves no kernel work
(or any other PI complexity) at all. No registration, no extra kernel
calls - just pure fast atomic ops in userspace.
- in the slowpath (in the lock-contention case), the system call and
scheduling pattern is in fact better than that of normal futexes, due to
the 'integrated' nature of FUTEX_LOCK_PI. [more about that further down]
- the in-kernel PI implementation is streamlined around the mutex
abstraction, with strict rules that keep the implementation relatively
simple: only a single owner may own a lock (i.e. no read-write lock
support), only the owner may unlock a lock, no recursive locking, etc.
Priority Inheritance - why, oh why???
-------------------------------------
Many of you heard the horror stories about the evil PI code circling Linux for
years, which makes no real sense at all and is only used by buggy applications
and which has horrible overhead. Some of you have dreaded this very moment,
when someone actually submits working PI code ;-)
So why would we like to see PI support for futexes?
We'd like to see it done purely for technological reasons. We dont think it's
a buggy concept, we think it's useful functionality to offer to applications,
which functionality cannot be achieved in other ways. We also think it's the
right thing to do, and we think we've got the right arguments and the right
numbers to prove that. We also believe that we can address all the
counter-arguments as well. For these reasons (and the reasons outlined below)
we are submitting this patch-set for upstream kernel inclusion.
What are the benefits of PI?
The short reply:
----------------
User-space PI helps achieving/improving determinism for user-space
applications. In the best-case, it can help achieve determinism and
well-bound latencies. Even in the worst-case, PI will improve the statistical
distribution of locking related application delays.
The longer reply:
-----------------
Firstly, sharing locks between multiple tasks is a common programming
technique that often cannot be replaced with lockless algorithms. As we can
see it in the kernel [which is a quite complex program in itself], lockless
structures are rather the exception than the norm - the current ratio of
lockless vs. locky code for shared data structures is somewhere between 1:10
and 1:100. Lockless is hard, and the complexity of lockless algorithms often
endangers to ability to do robust reviews of said code. I.e. critical RT
apps often choose lock structures to protect critical data structures, instead
of lockless algorithms. Furthermore, there are cases (like shared hardware,
or other resource limits) where lockless access is mathematically impossible.
Media players (such as Jack) are an example of reasonable application design
with multiple tasks (with multiple priority levels) sharing short-held locks:
for example, a highprio audio playback thread is combined with medium-prio
construct-audio-data threads and low-prio display-colory-stuff threads. Add
video and decoding to the mix and we've got even more priority levels.
So once we accept that synchronization objects (locks) are an unavoidable fact
of life, and once we accept that multi-task userspace apps have a very fair
expectation of being able to use locks, we've got to think about how to offer
the option of a deterministic locking implementation to user-space.
Most of the technical counter-arguments against doing priority inheritance
only apply to kernel-space locks. But user-space locks are different, there
we cannot disable interrupts or make the task non-preemptible in a critical
section, so the 'use spinlocks' argument does not apply (user-space spinlocks
have the same priority inversion problems as other user-space locking
constructs). Fact is, pretty much the only technique that currently enables
good determinism for userspace locks (such as futex-based pthread mutexes) is
priority inheritance:
Currently (without PI), if a high-prio and a low-prio task shares a lock [this
is a quite common scenario for most non-trivial RT applications], even if all
critical sections are coded carefully to be deterministic (i.e. all critical
sections are short in duration and only execute a limited number of
instructions), the kernel cannot guarantee any deterministic execution of the
high-prio task: any medium-priority task could preempt the low-prio task while
it holds the shared lock and executes the critical section, and could delay it
indefinitely.
Implementation:
---------------
As mentioned before, the userspace fastpath of PI-enabled pthread mutexes
involves no kernel work at all - they behave quite similarly to normal
futex-based locks: a 0 value means unlocked, and a value==TID means locked.
(This is the same method as used by list-based robust futexes.) Userspace uses
atomic ops to lock/unlock these mutexes without entering the kernel.
To handle the slowpath, we have added two new futex ops:
FUTEX_LOCK_PI
FUTEX_UNLOCK_PI
If the lock-acquire fastpath fails, [i.e. an atomic transition from 0 to TID
fails], then FUTEX_LOCK_PI is called. The kernel does all the remaining work:
if there is no futex-queue attached to the futex address yet then the code
looks up the task that owns the futex [it has put its own TID into the futex
value], and attaches a 'PI state' structure to the futex-queue. The pi_state
includes an rt-mutex, which is a PI-aware, kernel-based synchronization
object. The 'other' task is made the owner of the rt-mutex, and the
FUTEX_WAITERS bit is atomically set in the futex value. Then this task tries
to lock the rt-mutex, on which it blocks. Once it returns, it has the mutex
acquired, and it sets the futex value to its own TID and returns. Userspace
has no other work to perform - it now owns the lock, and futex value contains
FUTEX_WAITERS|TID.
If the unlock side fastpath succeeds, [i.e. userspace manages to do a TID ->
0 atomic transition of the futex value], then no kernel work is triggered.
If the unlock fastpath fails (because the FUTEX_WAITERS bit is set), then
FUTEX_UNLOCK_PI is called, and the kernel unlocks the futex on the behalf of
userspace - and it also unlocks the attached pi_state->rt_mutex and thus wakes
up any potential waiters.
Note that under this approach, contrary to other PI-futex approaches, there is
no prior 'registration' of a PI-futex. [which is not quite possible anyway,
due to existing ABI properties of pthread mutexes.]
Also, under this scheme, 'robustness' and 'PI' are two orthogonal properties
of futexes, and all four combinations are possible: futex, robust-futex,
PI-futex, robust+PI-futex.
glibc support:
--------------
Ulrich Drepper and Jakub Jelinek have written glibc support for PI-futexes
(and robust futexes), enabling robust and PI (PTHREAD_PRIO_INHERIT) POSIX
mutexes. (PTHREAD_PRIO_PROTECT support will be added later on too, no
additional kernel changes are needed for that). [NOTE: The glibc patch is
obviously inofficial and unsupported without matching upstream kernel
functionality.]
the patch-queue and the glibc patch can also be downloaded from:
http://redhat.com/~mingo/PI-futex-patches/
Many thanks go to the people who helped us create this kernel feature: Steven
Rostedt, Esben Nielsen, Benedikt Spranger, Daniel Walker, John Cooper, Arjan
van de Ven, Oleg Nesterov and others. Credits for related prior projects goes
to Dirk Grambow, Inaky Perez-Gonzalez, Bill Huey and many others.
Clean up the futex code, before adding more features to it:
- use u32 as the futex field type - that's the ABI
- use __user and pointers to u32 instead of unsigned long
- code style / comment style cleanups
- rename hash-bucket name from 'bh' to 'hb'.
I checked the pre and post futex.o object files to make sure this
patch has no code effects.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 17:54:47 +08:00
|
|
|
u32 val3);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
asmlinkage long sys_init_module(void __user *umod, unsigned long len,
|
|
|
|
const char __user *uargs);
|
|
|
|
asmlinkage long sys_delete_module(const char __user *name_user,
|
|
|
|
unsigned int flags);
|
|
|
|
|
|
|
|
asmlinkage long sys_rt_sigprocmask(int how, sigset_t __user *set,
|
|
|
|
sigset_t __user *oset, size_t sigsetsize);
|
|
|
|
asmlinkage long sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize);
|
|
|
|
asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese,
|
|
|
|
siginfo_t __user *uinfo,
|
|
|
|
const struct timespec __user *uts,
|
|
|
|
size_t sigsetsize);
|
|
|
|
asmlinkage long sys_kill(int pid, int sig);
|
|
|
|
asmlinkage long sys_tgkill(int tgid, int pid, int sig);
|
|
|
|
asmlinkage long sys_tkill(int pid, int sig);
|
|
|
|
asmlinkage long sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo);
|
|
|
|
asmlinkage long sys_sgetmask(void);
|
|
|
|
asmlinkage long sys_ssetmask(int newmask);
|
|
|
|
asmlinkage unsigned long sys_signal(int sig, __sighandler_t handler);
|
|
|
|
asmlinkage long sys_pause(void);
|
|
|
|
|
|
|
|
asmlinkage long sys_sync(void);
|
|
|
|
asmlinkage long sys_fsync(unsigned int fd);
|
|
|
|
asmlinkage long sys_fdatasync(unsigned int fd);
|
|
|
|
asmlinkage long sys_bdflush(int func, long data);
|
|
|
|
asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name,
|
|
|
|
char __user *type, unsigned long flags,
|
|
|
|
void __user *data);
|
|
|
|
asmlinkage long sys_umount(char __user *name, int flags);
|
|
|
|
asmlinkage long sys_oldumount(char __user *name);
|
|
|
|
asmlinkage long sys_truncate(const char __user *path,
|
|
|
|
unsigned long length);
|
|
|
|
asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
|
|
|
|
asmlinkage long sys_stat(char __user *filename,
|
|
|
|
struct __old_kernel_stat __user *statbuf);
|
|
|
|
asmlinkage long sys_statfs(const char __user * path,
|
|
|
|
struct statfs __user *buf);
|
|
|
|
asmlinkage long sys_statfs64(const char __user *path, size_t sz,
|
|
|
|
struct statfs64 __user *buf);
|
|
|
|
asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user *buf);
|
|
|
|
asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz,
|
|
|
|
struct statfs64 __user *buf);
|
|
|
|
asmlinkage long sys_lstat(char __user *filename,
|
|
|
|
struct __old_kernel_stat __user *statbuf);
|
|
|
|
asmlinkage long sys_fstat(unsigned int fd,
|
|
|
|
struct __old_kernel_stat __user *statbuf);
|
|
|
|
asmlinkage long sys_newstat(char __user *filename,
|
|
|
|
struct stat __user *statbuf);
|
|
|
|
asmlinkage long sys_newlstat(char __user *filename,
|
|
|
|
struct stat __user *statbuf);
|
|
|
|
asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf);
|
|
|
|
asmlinkage long sys_ustat(unsigned dev, struct ustat __user *ubuf);
|
|
|
|
#if BITS_PER_LONG == 32
|
|
|
|
asmlinkage long sys_stat64(char __user *filename,
|
|
|
|
struct stat64 __user *statbuf);
|
|
|
|
asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf);
|
|
|
|
asmlinkage long sys_lstat64(char __user *filename,
|
|
|
|
struct stat64 __user *statbuf);
|
|
|
|
asmlinkage long sys_truncate64(const char __user *path, loff_t length);
|
|
|
|
asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length);
|
|
|
|
#endif
|
|
|
|
|
2008-04-29 15:59:41 +08:00
|
|
|
asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
|
|
|
|
const void __user *value, size_t size, int flags);
|
|
|
|
asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name,
|
|
|
|
const void __user *value, size_t size, int flags);
|
|
|
|
asmlinkage long sys_fsetxattr(int fd, const char __user *name,
|
|
|
|
const void __user *value, size_t size, int flags);
|
|
|
|
asmlinkage ssize_t sys_getxattr(const char __user *path, const char __user *name,
|
2005-04-17 06:20:36 +08:00
|
|
|
void __user *value, size_t size);
|
2008-04-29 15:59:41 +08:00
|
|
|
asmlinkage ssize_t sys_lgetxattr(const char __user *path, const char __user *name,
|
2005-04-17 06:20:36 +08:00
|
|
|
void __user *value, size_t size);
|
2008-04-29 15:59:41 +08:00
|
|
|
asmlinkage ssize_t sys_fgetxattr(int fd, const char __user *name,
|
2005-04-17 06:20:36 +08:00
|
|
|
void __user *value, size_t size);
|
2008-04-29 15:59:41 +08:00
|
|
|
asmlinkage ssize_t sys_listxattr(const char __user *path, char __user *list,
|
2005-04-17 06:20:36 +08:00
|
|
|
size_t size);
|
2008-04-29 15:59:41 +08:00
|
|
|
asmlinkage ssize_t sys_llistxattr(const char __user *path, char __user *list,
|
2005-04-17 06:20:36 +08:00
|
|
|
size_t size);
|
|
|
|
asmlinkage ssize_t sys_flistxattr(int fd, char __user *list, size_t size);
|
2008-04-29 15:59:41 +08:00
|
|
|
asmlinkage long sys_removexattr(const char __user *path,
|
|
|
|
const char __user *name);
|
|
|
|
asmlinkage long sys_lremovexattr(const char __user *path,
|
|
|
|
const char __user *name);
|
|
|
|
asmlinkage long sys_fremovexattr(int fd, const char __user *name);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
asmlinkage unsigned long sys_brk(unsigned long brk);
|
|
|
|
asmlinkage long sys_mprotect(unsigned long start, size_t len,
|
|
|
|
unsigned long prot);
|
|
|
|
asmlinkage unsigned long sys_mremap(unsigned long addr,
|
|
|
|
unsigned long old_len, unsigned long new_len,
|
|
|
|
unsigned long flags, unsigned long new_addr);
|
|
|
|
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
|
|
|
|
unsigned long prot, unsigned long pgoff,
|
|
|
|
unsigned long flags);
|
|
|
|
asmlinkage long sys_msync(unsigned long start, size_t len, int flags);
|
|
|
|
asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice);
|
|
|
|
asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice);
|
|
|
|
asmlinkage long sys_munmap(unsigned long addr, size_t len);
|
|
|
|
asmlinkage long sys_mlock(unsigned long start, size_t len);
|
|
|
|
asmlinkage long sys_munlock(unsigned long start, size_t len);
|
|
|
|
asmlinkage long sys_mlockall(int flags);
|
|
|
|
asmlinkage long sys_munlockall(void);
|
|
|
|
asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
|
|
|
|
asmlinkage long sys_mincore(unsigned long start, size_t len,
|
|
|
|
unsigned char __user * vec);
|
|
|
|
|
|
|
|
asmlinkage long sys_pivot_root(const char __user *new_root,
|
|
|
|
const char __user *put_old);
|
|
|
|
asmlinkage long sys_chroot(const char __user *filename);
|
|
|
|
asmlinkage long sys_mknod(const char __user *filename, int mode,
|
|
|
|
unsigned dev);
|
|
|
|
asmlinkage long sys_link(const char __user *oldname,
|
|
|
|
const char __user *newname);
|
|
|
|
asmlinkage long sys_symlink(const char __user *old, const char __user *new);
|
|
|
|
asmlinkage long sys_unlink(const char __user *pathname);
|
|
|
|
asmlinkage long sys_rename(const char __user *oldname,
|
|
|
|
const char __user *newname);
|
|
|
|
asmlinkage long sys_chmod(const char __user *filename, mode_t mode);
|
|
|
|
asmlinkage long sys_fchmod(unsigned int fd, mode_t mode);
|
|
|
|
|
|
|
|
asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg);
|
|
|
|
#if BITS_PER_LONG == 32
|
|
|
|
asmlinkage long sys_fcntl64(unsigned int fd,
|
|
|
|
unsigned int cmd, unsigned long arg);
|
|
|
|
#endif
|
|
|
|
asmlinkage long sys_dup(unsigned int fildes);
|
|
|
|
asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd);
|
|
|
|
asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on);
|
|
|
|
asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd,
|
|
|
|
unsigned long arg);
|
|
|
|
asmlinkage long sys_flock(unsigned int fd, unsigned int cmd);
|
|
|
|
asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx);
|
|
|
|
asmlinkage long sys_io_destroy(aio_context_t ctx);
|
|
|
|
asmlinkage long sys_io_getevents(aio_context_t ctx_id,
|
|
|
|
long min_nr,
|
|
|
|
long nr,
|
|
|
|
struct io_event __user *events,
|
|
|
|
struct timespec __user *timeout);
|
|
|
|
asmlinkage long sys_io_submit(aio_context_t, long,
|
|
|
|
struct iocb __user * __user *);
|
|
|
|
asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
|
|
|
|
struct io_event __user *result);
|
|
|
|
asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd,
|
|
|
|
off_t __user *offset, size_t count);
|
|
|
|
asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd,
|
|
|
|
loff_t __user *offset, size_t count);
|
|
|
|
asmlinkage long sys_readlink(const char __user *path,
|
|
|
|
char __user *buf, int bufsiz);
|
|
|
|
asmlinkage long sys_creat(const char __user *pathname, int mode);
|
|
|
|
asmlinkage long sys_open(const char __user *filename,
|
|
|
|
int flags, int mode);
|
|
|
|
asmlinkage long sys_close(unsigned int fd);
|
|
|
|
asmlinkage long sys_access(const char __user *filename, int mode);
|
|
|
|
asmlinkage long sys_vhangup(void);
|
|
|
|
asmlinkage long sys_chown(const char __user *filename,
|
|
|
|
uid_t user, gid_t group);
|
|
|
|
asmlinkage long sys_lchown(const char __user *filename,
|
|
|
|
uid_t user, gid_t group);
|
|
|
|
asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
|
|
|
|
#ifdef CONFIG_UID16
|
|
|
|
asmlinkage long sys_chown16(const char __user *filename,
|
|
|
|
old_uid_t user, old_gid_t group);
|
|
|
|
asmlinkage long sys_lchown16(const char __user *filename,
|
|
|
|
old_uid_t user, old_gid_t group);
|
|
|
|
asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group);
|
|
|
|
asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid);
|
|
|
|
asmlinkage long sys_setgid16(old_gid_t gid);
|
|
|
|
asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid);
|
|
|
|
asmlinkage long sys_setuid16(old_uid_t uid);
|
|
|
|
asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid);
|
|
|
|
asmlinkage long sys_getresuid16(old_uid_t __user *ruid,
|
|
|
|
old_uid_t __user *euid, old_uid_t __user *suid);
|
|
|
|
asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid);
|
|
|
|
asmlinkage long sys_getresgid16(old_gid_t __user *rgid,
|
|
|
|
old_gid_t __user *egid, old_gid_t __user *sgid);
|
|
|
|
asmlinkage long sys_setfsuid16(old_uid_t uid);
|
|
|
|
asmlinkage long sys_setfsgid16(old_gid_t gid);
|
|
|
|
asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist);
|
|
|
|
asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist);
|
|
|
|
asmlinkage long sys_getuid16(void);
|
|
|
|
asmlinkage long sys_geteuid16(void);
|
|
|
|
asmlinkage long sys_getgid16(void);
|
|
|
|
asmlinkage long sys_getegid16(void);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
asmlinkage long sys_utime(char __user *filename,
|
|
|
|
struct utimbuf __user *times);
|
|
|
|
asmlinkage long sys_utimes(char __user *filename,
|
|
|
|
struct timeval __user *utimes);
|
|
|
|
asmlinkage off_t sys_lseek(unsigned int fd, off_t offset,
|
|
|
|
unsigned int origin);
|
|
|
|
asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high,
|
|
|
|
unsigned long offset_low, loff_t __user *result,
|
|
|
|
unsigned int origin);
|
|
|
|
asmlinkage ssize_t sys_read(unsigned int fd, char __user *buf,
|
|
|
|
size_t count);
|
|
|
|
asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count);
|
|
|
|
asmlinkage ssize_t sys_readv(unsigned long fd,
|
|
|
|
const struct iovec __user *vec,
|
|
|
|
unsigned long vlen);
|
|
|
|
asmlinkage ssize_t sys_write(unsigned int fd, const char __user *buf,
|
|
|
|
size_t count);
|
|
|
|
asmlinkage ssize_t sys_writev(unsigned long fd,
|
|
|
|
const struct iovec __user *vec,
|
|
|
|
unsigned long vlen);
|
|
|
|
asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf,
|
|
|
|
size_t count, loff_t pos);
|
|
|
|
asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf,
|
|
|
|
size_t count, loff_t pos);
|
|
|
|
asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
|
|
|
|
asmlinkage long sys_mkdir(const char __user *pathname, int mode);
|
|
|
|
asmlinkage long sys_chdir(const char __user *filename);
|
|
|
|
asmlinkage long sys_fchdir(unsigned int fd);
|
|
|
|
asmlinkage long sys_rmdir(const char __user *pathname);
|
|
|
|
asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user *buf, size_t len);
|
|
|
|
asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special,
|
|
|
|
qid_t id, void __user *addr);
|
|
|
|
asmlinkage long sys_getdents(unsigned int fd,
|
|
|
|
struct linux_dirent __user *dirent,
|
|
|
|
unsigned int count);
|
|
|
|
asmlinkage long sys_getdents64(unsigned int fd,
|
|
|
|
struct linux_dirent64 __user *dirent,
|
|
|
|
unsigned int count);
|
|
|
|
|
|
|
|
asmlinkage long sys_setsockopt(int fd, int level, int optname,
|
|
|
|
char __user *optval, int optlen);
|
|
|
|
asmlinkage long sys_getsockopt(int fd, int level, int optname,
|
|
|
|
char __user *optval, int __user *optlen);
|
|
|
|
asmlinkage long sys_bind(int, struct sockaddr __user *, int);
|
|
|
|
asmlinkage long sys_connect(int, struct sockaddr __user *, int);
|
|
|
|
asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *);
|
flag parameters: paccept
This patch is by far the most complex in the series. It adds a new syscall
paccept. This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:
- a signal mask
- a flags value
The flags parameter can be used to set flag like SOCK_CLOEXEC. This is
imlpemented here as well. Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX. Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc). So an interface change in inevitable.
The flag value is the same as for socket and socketpair. I think diverging
here will only create confusion. Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.
The signal mask is handled as for pselect etc. The mask is temporarily
installed for the thread and removed before the call returns. I modeled the
code after pselect. If there is a problem it's likely also in pselect.
For architectures which use socketcall I maintained this interface instead of
adding a system call. The symmetry shouldn't be broken.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_CLOEXEC O_CLOEXEC
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
sleep (2);
pthread_kill ((pthread_t) arg, SIGUSR1);
return NULL;
}
static void
handler (int s)
{
}
int
main (void)
{
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
int coe = fcntl (s2, F_GETFD);
if (coe & FD_CLOEXEC)
{
puts ("paccept(0) set close-on-exec-flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
if (s2 < 0)
{
puts ("paccept(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (s2, F_GETFD);
if ((coe & FD_CLOEXEC) == 0)
{
puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
struct sigaction sa;
sa.sa_handler = handler;
sa.sa_flags = 0;
sigemptyset (&sa.sa_mask);
sigaction (SIGUSR1, &sa, NULL);
sigset_t ss;
pthread_sigmask (SIG_SETMASK, NULL, &ss);
sigaddset (&ss, SIGUSR1);
pthread_sigmask (SIG_SETMASK, &ss, NULL);
sigdelset (&ss, SIGUSR1);
alarm (4);
pthread_barrier_wait (&b);
errno = 0 ;
s2 = paccept (s, NULL, 0, &ss, 0);
if (s2 != -1 || errno != EINTR)
{
puts ("paccept did not fail with EINTR");
return 1;
}
close (s);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 12:29:20 +08:00
|
|
|
asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *,
|
|
|
|
const sigset_t *, size_t, int);
|
2005-04-17 06:20:36 +08:00
|
|
|
asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *);
|
|
|
|
asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
|
|
|
|
asmlinkage long sys_send(int, void __user *, size_t, unsigned);
|
|
|
|
asmlinkage long sys_sendto(int, void __user *, size_t, unsigned,
|
|
|
|
struct sockaddr __user *, int);
|
|
|
|
asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags);
|
|
|
|
asmlinkage long sys_recv(int, void __user *, size_t, unsigned);
|
|
|
|
asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned,
|
|
|
|
struct sockaddr __user *, int __user *);
|
|
|
|
asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags);
|
|
|
|
asmlinkage long sys_socket(int, int, int);
|
|
|
|
asmlinkage long sys_socketpair(int, int, int, int __user *);
|
|
|
|
asmlinkage long sys_socketcall(int call, unsigned long __user *args);
|
|
|
|
asmlinkage long sys_listen(int, int);
|
|
|
|
asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
|
|
|
|
long timeout);
|
|
|
|
asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
|
|
|
|
fd_set __user *exp, struct timeval __user *tvp);
|
|
|
|
asmlinkage long sys_epoll_create(int size);
|
|
|
|
asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
|
|
|
|
struct epoll_event __user *event);
|
|
|
|
asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
|
|
|
|
int maxevents, int timeout);
|
2006-10-11 16:21:44 +08:00
|
|
|
asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
|
|
|
|
int maxevents, int timeout,
|
|
|
|
const sigset_t __user *sigmask,
|
|
|
|
size_t sigsetsize);
|
2005-04-17 06:20:36 +08:00
|
|
|
asmlinkage long sys_gethostname(char __user *name, int len);
|
|
|
|
asmlinkage long sys_sethostname(char __user *name, int len);
|
|
|
|
asmlinkage long sys_setdomainname(char __user *name, int len);
|
|
|
|
asmlinkage long sys_newuname(struct new_utsname __user *name);
|
|
|
|
|
|
|
|
asmlinkage long sys_getrlimit(unsigned int resource,
|
|
|
|
struct rlimit __user *rlim);
|
2008-07-24 12:28:50 +08:00
|
|
|
#if defined(COMPAT_RLIM_OLD_INFINITY) || !(defined(CONFIG_IA64))
|
2005-04-17 06:20:36 +08:00
|
|
|
asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
|
|
|
|
#endif
|
|
|
|
asmlinkage long sys_setrlimit(unsigned int resource,
|
|
|
|
struct rlimit __user *rlim);
|
|
|
|
asmlinkage long sys_getrusage(int who, struct rusage __user *ru);
|
|
|
|
asmlinkage long sys_umask(int mask);
|
|
|
|
|
|
|
|
asmlinkage long sys_msgget(key_t key, int msgflg);
|
|
|
|
asmlinkage long sys_msgsnd(int msqid, struct msgbuf __user *msgp,
|
|
|
|
size_t msgsz, int msgflg);
|
|
|
|
asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp,
|
|
|
|
size_t msgsz, long msgtyp, int msgflg);
|
|
|
|
asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf);
|
|
|
|
|
|
|
|
asmlinkage long sys_semget(key_t key, int nsems, int semflg);
|
|
|
|
asmlinkage long sys_semop(int semid, struct sembuf __user *sops,
|
|
|
|
unsigned nsops);
|
|
|
|
asmlinkage long sys_semctl(int semid, int semnum, int cmd, union semun arg);
|
|
|
|
asmlinkage long sys_semtimedop(int semid, struct sembuf __user *sops,
|
|
|
|
unsigned nsops,
|
|
|
|
const struct timespec __user *timeout);
|
2005-05-01 23:59:12 +08:00
|
|
|
asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg);
|
2005-04-17 06:20:36 +08:00
|
|
|
asmlinkage long sys_shmget(key_t key, size_t size, int flag);
|
|
|
|
asmlinkage long sys_shmdt(char __user *shmaddr);
|
|
|
|
asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
|
|
|
|
|
|
|
|
asmlinkage long sys_mq_open(const char __user *name, int oflag, mode_t mode, struct mq_attr __user *attr);
|
|
|
|
asmlinkage long sys_mq_unlink(const char __user *name);
|
|
|
|
asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec __user *abs_timeout);
|
|
|
|
asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout);
|
|
|
|
asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification);
|
|
|
|
asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat);
|
|
|
|
|
|
|
|
asmlinkage long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn);
|
|
|
|
asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn,
|
|
|
|
unsigned long off, unsigned long len,
|
|
|
|
void __user *buf);
|
|
|
|
asmlinkage long sys_pciconfig_write(unsigned long bus, unsigned long dfn,
|
|
|
|
unsigned long off, unsigned long len,
|
|
|
|
void __user *buf);
|
|
|
|
|
|
|
|
asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
|
|
|
|
unsigned long arg4, unsigned long arg5);
|
|
|
|
asmlinkage long sys_swapon(const char __user *specialfile, int swap_flags);
|
|
|
|
asmlinkage long sys_swapoff(const char __user *specialfile);
|
|
|
|
asmlinkage long sys_sysctl(struct __sysctl_args __user *args);
|
|
|
|
asmlinkage long sys_sysinfo(struct sysinfo __user *info);
|
|
|
|
asmlinkage long sys_sysfs(int option,
|
|
|
|
unsigned long arg1, unsigned long arg2);
|
|
|
|
asmlinkage long sys_nfsservctl(int cmd,
|
|
|
|
struct nfsctl_arg __user *arg,
|
|
|
|
void __user *res);
|
|
|
|
asmlinkage long sys_syslog(int type, char __user *buf, int len);
|
|
|
|
asmlinkage long sys_uselib(const char __user *library);
|
|
|
|
asmlinkage long sys_ni_syscall(void);
|
2005-10-31 07:02:22 +08:00
|
|
|
asmlinkage long sys_ptrace(long request, long pid, long addr, long data);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
asmlinkage long sys_add_key(const char __user *_type,
|
|
|
|
const char __user *_description,
|
|
|
|
const void __user *_payload,
|
|
|
|
size_t plen,
|
|
|
|
key_serial_t destringid);
|
|
|
|
|
|
|
|
asmlinkage long sys_request_key(const char __user *_type,
|
|
|
|
const char __user *_description,
|
|
|
|
const char __user *_callout_info,
|
|
|
|
key_serial_t destringid);
|
|
|
|
|
|
|
|
asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3,
|
|
|
|
unsigned long arg4, unsigned long arg5);
|
|
|
|
|
2005-07-08 08:56:13 +08:00
|
|
|
asmlinkage long sys_ioprio_set(int which, int who, int ioprio);
|
|
|
|
asmlinkage long sys_ioprio_get(int which, int who);
|
2005-09-22 00:55:43 +08:00
|
|
|
asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
|
2006-01-19 09:43:04 +08:00
|
|
|
unsigned long maxnode);
|
2006-01-08 17:00:51 +08:00
|
|
|
asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
|
2006-01-19 09:43:04 +08:00
|
|
|
const unsigned long __user *from,
|
|
|
|
const unsigned long __user *to);
|
2006-06-23 17:03:55 +08:00
|
|
|
asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
|
|
|
|
const void __user * __user *pages,
|
|
|
|
const int __user *nodes,
|
|
|
|
int __user *status,
|
|
|
|
int flags);
|
2006-06-23 17:03:56 +08:00
|
|
|
asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page,
|
2006-06-23 17:03:57 +08:00
|
|
|
__u32 __user *pages,
|
2006-06-23 17:03:56 +08:00
|
|
|
const int __user *nodes,
|
|
|
|
int __user *status,
|
|
|
|
int flags);
|
2006-01-19 09:43:04 +08:00
|
|
|
asmlinkage long sys_mbind(unsigned long start, unsigned long len,
|
|
|
|
unsigned long mode,
|
|
|
|
unsigned long __user *nmask,
|
|
|
|
unsigned long maxnode,
|
|
|
|
unsigned flags);
|
|
|
|
asmlinkage long sys_get_mempolicy(int __user *policy,
|
|
|
|
unsigned long __user *nmask,
|
|
|
|
unsigned long maxnode,
|
|
|
|
unsigned long addr, unsigned long flags);
|
|
|
|
|
|
|
|
asmlinkage long sys_inotify_init(void);
|
|
|
|
asmlinkage long sys_inotify_add_watch(int fd, const char __user *path,
|
|
|
|
u32 mask);
|
|
|
|
asmlinkage long sys_inotify_rm_watch(int fd, u32 wd);
|
2005-07-08 08:56:13 +08:00
|
|
|
|
2005-11-16 04:53:48 +08:00
|
|
|
asmlinkage long sys_spu_run(int fd, __u32 __user *unpc,
|
|
|
|
__u32 __user *ustatus);
|
|
|
|
asmlinkage long sys_spu_create(const char __user *name,
|
2007-07-21 03:39:47 +08:00
|
|
|
unsigned int flags, mode_t mode, int fd);
|
2005-11-16 04:53:48 +08:00
|
|
|
|
2006-02-01 19:04:33 +08:00
|
|
|
asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode,
|
|
|
|
unsigned dev);
|
|
|
|
asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, int mode);
|
|
|
|
asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag);
|
|
|
|
asmlinkage long sys_symlinkat(const char __user * oldname,
|
|
|
|
int newdfd, const char __user * newname);
|
|
|
|
asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
|
2006-02-25 05:04:21 +08:00
|
|
|
int newdfd, const char __user *newname, int flags);
|
2006-02-01 19:04:33 +08:00
|
|
|
asmlinkage long sys_renameat(int olddfd, const char __user * oldname,
|
|
|
|
int newdfd, const char __user * newname);
|
|
|
|
asmlinkage long sys_futimesat(int dfd, char __user *filename,
|
|
|
|
struct timeval __user *utimes);
|
|
|
|
asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode);
|
|
|
|
asmlinkage long sys_fchmodat(int dfd, const char __user * filename,
|
|
|
|
mode_t mode);
|
|
|
|
asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
|
|
|
|
gid_t group, int flag);
|
|
|
|
asmlinkage long sys_openat(int dfd, const char __user *filename, int flags,
|
|
|
|
int mode);
|
|
|
|
asmlinkage long sys_newfstatat(int dfd, char __user *filename,
|
|
|
|
struct stat __user *statbuf, int flag);
|
2006-02-12 09:55:47 +08:00
|
|
|
asmlinkage long sys_fstatat64(int dfd, char __user *filename,
|
|
|
|
struct stat64 __user *statbuf, int flag);
|
2006-02-01 19:04:33 +08:00
|
|
|
asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf,
|
|
|
|
int bufsiz);
|
2007-05-09 17:32:35 +08:00
|
|
|
asmlinkage long sys_utimensat(int dfd, char __user *filename,
|
|
|
|
struct timespec __user *utimes, int flags);
|
2006-02-02 13:11:51 +08:00
|
|
|
asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename,
|
2006-02-01 19:04:33 +08:00
|
|
|
struct compat_timeval __user *t);
|
2006-02-02 13:11:51 +08:00
|
|
|
asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename,
|
2006-02-01 19:04:33 +08:00
|
|
|
struct compat_stat __user *statbuf,
|
|
|
|
int flag);
|
2006-02-02 13:11:51 +08:00
|
|
|
asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
|
2006-02-01 19:04:33 +08:00
|
|
|
int flags, int mode);
|
2006-03-24 19:15:08 +08:00
|
|
|
asmlinkage long sys_unshare(unsigned long unshare_flags);
|
2006-04-10 21:18:58 +08:00
|
|
|
|
|
|
|
asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
|
|
|
|
int fd_out, loff_t __user *off_out,
|
|
|
|
size_t len, unsigned int flags);
|
|
|
|
|
2006-04-26 16:59:21 +08:00
|
|
|
asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
|
|
|
|
unsigned long nr_segs, unsigned int flags);
|
|
|
|
|
2006-04-11 21:51:17 +08:00
|
|
|
asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags);
|
|
|
|
|
2006-03-31 18:30:42 +08:00
|
|
|
asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
|
2006-04-11 13:53:57 +08:00
|
|
|
unsigned int flags);
|
Introduce fixed sys_sync_file_range2() syscall, implement on PowerPC and ARM
Not all the world is an i386. Many architectures need 64-bit arguments to be
aligned in suitable pairs of registers, and the original
sys_sync_file_range(int, loff_t, loff_t, int) was therefore wasting an
argument register for padding after the first integer. Since we don't
normally have more than 6 arguments for system calls, that left no room for
the final argument on some architectures.
Fix this by introducing sys_sync_file_range2(int, int, loff_t, loff_t) which
all fits nicely. In fact, ARM already had that, but called it
sys_arm_sync_file_range. Move it to fs/sync.c and rename it, then implement
the needed compatibility routine. And stop the missing syscall check from
bitching about the absence of sys_sync_file_range() if we've implemented
sys_sync_file_range2() instead.
Tested on PPC32 and with 32-bit and 64-bit userspace on PPC64.
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-06-28 05:10:09 +08:00
|
|
|
asmlinkage long sys_sync_file_range2(int fd, unsigned int flags,
|
|
|
|
loff_t offset, loff_t nbytes);
|
2006-05-23 22:46:40 +08:00
|
|
|
asmlinkage long sys_get_robust_list(int pid,
|
2006-10-11 05:46:07 +08:00
|
|
|
struct robust_list_head __user * __user *head_ptr,
|
2006-05-23 22:46:40 +08:00
|
|
|
size_t __user *len_ptr);
|
|
|
|
asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
|
|
|
|
size_t len);
|
2006-09-29 16:58:35 +08:00
|
|
|
asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache);
|
signal/timer/event: signalfd core
This patch series implements the new signalfd() system call.
I took part of the original Linus code (and you know how badly it can be
broken :), and I added even more breakage ;) Signals are fetched from the same
signal queue used by the process, so signalfd will compete with standard
kernel delivery in dequeue_signal(). If you want to reliably fetch signals on
the signalfd file, you need to block them with sigprocmask(SIG_BLOCK). This
seems to be working fine on my Dual Opteron machine. I made a quick test
program for it:
http://www.xmailserver.org/signafd-test.c
The signalfd() system call implements signal delivery into a file descriptor
receiver. The signalfd file descriptor if created with the following API:
int signalfd(int ufd, const sigset_t *mask, size_t masksize);
The "ufd" parameter allows to change an existing signalfd sigmask, w/out going
to close/create cycle (Linus idea). Use "ufd" == -1 if you want a brand new
signalfd file.
The "mask" allows to specify the signal mask of signals that we are interested
in. The "masksize" parameter is the size of "mask".
The signalfd fd supports the poll(2) and read(2) system calls. The poll(2)
will return POLLIN when signals are available to be dequeued. As a direct
consequence of supporting the Linux poll subsystem, the signalfd fd can use
used together with epoll(2) too.
The read(2) system call will return a "struct signalfd_siginfo" structure in
the userspace supplied buffer. The return value is the number of bytes copied
in the supplied buffer, or -1 in case of error. The read(2) call can also
return 0, in case the sighand structure to which the signalfd was attached,
has been orphaned. The O_NONBLOCK flag is also supported, and read(2) will
return -EAGAIN in case no signal is available.
If the size of the buffer passed to read(2) is lower than sizeof(struct
signalfd_siginfo), -EINVAL is returned. A read from the signalfd can also
return -ERESTARTSYS in case a signal hits the process. The format of the
struct signalfd_siginfo is, and the valid fields depends of the (->code &
__SI_MASK) value, in the same way a struct siginfo would:
struct signalfd_siginfo {
__u32 signo; /* si_signo */
__s32 err; /* si_errno */
__s32 code; /* si_code */
__u32 pid; /* si_pid */
__u32 uid; /* si_uid */
__s32 fd; /* si_fd */
__u32 tid; /* si_fd */
__u32 band; /* si_band */
__u32 overrun; /* si_overrun */
__u32 trapno; /* si_trapno */
__s32 status; /* si_status */
__s32 svint; /* si_int */
__u64 svptr; /* si_ptr */
__u64 utime; /* si_utime */
__u64 stime; /* si_stime */
__u64 addr; /* si_addr */
};
[akpm@linux-foundation.org: fix signalfd_copyinfo() on i386]
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-11 13:23:13 +08:00
|
|
|
asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask);
|
timerfd: new timerfd API
This is the new timerfd API as it is implemented by the following patch:
int timerfd_create(int clockid, int flags);
int timerfd_settime(int ufd, int flags,
const struct itimerspec *utmr,
struct itimerspec *otmr);
int timerfd_gettime(int ufd, struct itimerspec *otmr);
The timerfd_create() API creates an un-programmed timerfd fd. The "clockid"
parameter can be either CLOCK_MONOTONIC or CLOCK_REALTIME.
The timerfd_settime() API give new settings by the timerfd fd, by optionally
retrieving the previous expiration time (in case the "otmr" parameter is not
NULL).
The time value specified in "utmr" is absolute, if the TFD_TIMER_ABSTIME bit
is set in the "flags" parameter. Otherwise it's a relative time.
The timerfd_gettime() API returns the next expiration time of the timer, or
{0, 0} if the timerfd has not been set yet.
Like the previous timerfd API implementation, read(2) and poll(2) are
supported (with the same interface). Here's a simple test program I used to
exercise the new timerfd APIs:
http://www.xmailserver.org/timerfd-test2.c
[akpm@linux-foundation.org: coding-style cleanups]
[akpm@linux-foundation.org: fix ia64 build]
[akpm@linux-foundation.org: fix m68k build]
[akpm@linux-foundation.org: fix mips build]
[akpm@linux-foundation.org: fix alpha, arm, blackfin, cris, m68k, s390, sparc and sparc64 builds]
[heiko.carstens@de.ibm.com: fix s390]
[akpm@linux-foundation.org: fix powerpc build]
[akpm@linux-foundation.org: fix sparc64 more]
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-05 14:27:26 +08:00
|
|
|
asmlinkage long sys_timerfd_create(int clockid, int flags);
|
|
|
|
asmlinkage long sys_timerfd_settime(int ufd, int flags,
|
|
|
|
const struct itimerspec __user *utmr,
|
|
|
|
struct itimerspec __user *otmr);
|
|
|
|
asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
|
signal/timer/event: eventfd core
This is a very simple and light file descriptor, that can be used as event
wait/dispatch by userspace (both wait and dispatch) and by the kernel
(dispatch only). It can be used instead of pipe(2) in all cases where those
would simply be used to signal events. Their kernel overhead is much lower
than pipes, and they do not consume two fds. When used in the kernel, it can
offer an fd-bridge to enable, for example, functionalities like KAIO or
syslets/threadlets to signal to an fd the completion of certain operations.
But more in general, an eventfd can be used by the kernel to signal readiness,
in a POSIX poll/select way, of interfaces that would otherwise be incompatible
with it. The API is:
int eventfd(unsigned int count);
The eventfd API accepts an initial "count" parameter, and returns an eventfd
fd. It supports poll(2) (POLLIN, POLLOUT, POLLERR), read(2) and write(2).
The POLLIN flag is raised when the internal counter is greater than zero.
The POLLOUT flag is raised when at least a value of "1" can be written to the
internal counter.
The POLLERR flag is raised when an overflow in the counter value is detected.
The write(2) operation can never overflow the counter, since it blocks (unless
O_NONBLOCK is set, in which case -EAGAIN is returned).
But the eventfd_signal() function can do it, since it's supposed to not sleep
during its operation.
The read(2) function reads the __u64 counter value, and reset the internal
value to zero. If the value read is equal to (__u64) -1, an overflow happened
on the internal counter (due to 2^64 eventfd_signal() posts that has never
been retired - unlickely, but possible).
The write(2) call writes an __u64 count value, and adds it to the current
counter. The eventfd fd supports O_NONBLOCK also.
On the kernel side, we have:
struct file *eventfd_fget(int fd);
int eventfd_signal(struct file *file, unsigned int n);
The eventfd_fget() should be called to get a struct file* from an eventfd fd
(this is an fget() + check of f_op being an eventfd fops pointer).
The kernel can then call eventfd_signal() every time it wants to post an event
to userspace. The eventfd_signal() function can be called from any context.
An eventfd() simple test and bench is available here:
http://www.xmailserver.org/eventfd-bench.c
This is the eventfd-based version of pipetest-4 (pipe(2) based):
http://www.xmailserver.org/pipetest-4.c
Not that performance matters much in the eventfd case, but eventfd-bench
shows almost as double as performance than pipetest-4.
[akpm@linux-foundation.org: fix i386 build]
[akpm@linux-foundation.org: add sys_eventfd to sys_ni.c]
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-11 13:23:19 +08:00
|
|
|
asmlinkage long sys_eventfd(unsigned int count);
|
sys_fallocate() implementation on i386, x86_64 and powerpc
fallocate() is a new system call being proposed here which will allow
applications to preallocate space to any file(s) in a file system.
Each file system implementation that wants to use this feature will need
to support an inode operation called ->fallocate().
Applications can use this feature to avoid fragmentation to certain
level and thus get faster access speed. With preallocation, applications
also get a guarantee of space for particular file(s) - even if later the
the system becomes full.
Currently, glibc provides an interface called posix_fallocate() which
can be used for similar cause. Though this has the advantage of working
on all file systems, but it is quite slow (since it writes zeroes to
each block that has to be preallocated). Without a doubt, file systems
can do this more efficiently within the kernel, by implementing
the proposed fallocate() system call. It is expected that
posix_fallocate() will be modified to call this new system call first
and incase the kernel/filesystem does not implement it, it should fall
back to the current implementation of writing zeroes to the new blocks.
ToDos:
1. Implementation on other architectures (other than i386, x86_64,
and ppc). Patches for s390(x) and ia64 are already available from
previous posts, but it was decided that they should be added later
once fallocate is in the mainline. Hence not including those patches
in this take.
2. Changes to glibc,
a) to support fallocate() system call
b) to make posix_fallocate() and posix_fallocate64() call fallocate()
Signed-off-by: Amit Arora <aarora@in.ibm.com>
2007-07-18 09:42:44 +08:00
|
|
|
asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
|
2006-02-01 19:04:33 +08:00
|
|
|
|
2006-10-02 17:18:31 +08:00
|
|
|
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|