linux_old1/include/linux/compat.h

570 lines
20 KiB
C
Raw Normal View History

#ifndef _LINUX_COMPAT_H
#define _LINUX_COMPAT_H
/*
* These are the type definitions for the architecture specific
* syscall compatibility layer.
*/
#ifdef CONFIG_COMPAT
#include <linux/stat.h>
#include <linux/param.h> /* for HZ */
#include <linux/sem.h>
#include <linux/socket.h>
#include <linux/if.h>
#include <linux/fs.h>
#include <linux/aio_abi.h> /* for aio_context_t */
#include <asm/compat.h>
#include <asm/siginfo.h>
#include <asm/signal.h>
#define compat_jiffies_to_clock_t(x) \
(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
typedef __compat_uid32_t compat_uid_t;
typedef __compat_gid32_t compat_gid_t;
struct compat_sel_arg_struct;
struct rusage;
struct compat_itimerspec {
struct compat_timespec it_interval;
struct compat_timespec it_value;
};
struct compat_utimbuf {
compat_time_t actime;
compat_time_t modtime;
};
struct compat_itimerval {
struct compat_timeval it_interval;
struct compat_timeval it_value;
};
struct compat_tms {
compat_clock_t tms_utime;
compat_clock_t tms_stime;
compat_clock_t tms_cutime;
compat_clock_t tms_cstime;
};
struct compat_timex {
compat_uint_t modes;
compat_long_t offset;
compat_long_t freq;
compat_long_t maxerror;
compat_long_t esterror;
compat_int_t status;
compat_long_t constant;
compat_long_t precision;
compat_long_t tolerance;
struct compat_timeval time;
compat_long_t tick;
compat_long_t ppsfreq;
compat_long_t jitter;
compat_int_t shift;
compat_long_t stabil;
compat_long_t jitcnt;
compat_long_t calcnt;
compat_long_t errcnt;
compat_long_t stbcnt;
compat_int_t tai;
compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32;
compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32;
compat_int_t:32; compat_int_t:32; compat_int_t:32;
};
#define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW)
typedef struct {
compat_sigset_word sig[_COMPAT_NSIG_WORDS];
} compat_sigset_t;
extern int get_compat_timespec(struct timespec *,
const struct compat_timespec __user *);
extern int put_compat_timespec(const struct timespec *,
struct compat_timespec __user *);
struct compat_iovec {
compat_uptr_t iov_base;
compat_size_t iov_len;
};
struct compat_rlimit {
compat_ulong_t rlim_cur;
compat_ulong_t rlim_max;
};
struct compat_rusage {
struct compat_timeval ru_utime;
struct compat_timeval ru_stime;
compat_long_t ru_maxrss;
compat_long_t ru_ixrss;
compat_long_t ru_idrss;
compat_long_t ru_isrss;
compat_long_t ru_minflt;
compat_long_t ru_majflt;
compat_long_t ru_nswap;
compat_long_t ru_inblock;
compat_long_t ru_oublock;
compat_long_t ru_msgsnd;
compat_long_t ru_msgrcv;
compat_long_t ru_nsignals;
compat_long_t ru_nvcsw;
compat_long_t ru_nivcsw;
};
extern int put_compat_rusage(const struct rusage *,
struct compat_rusage __user *);
struct compat_siginfo;
extern asmlinkage long compat_sys_waitid(int, compat_pid_t,
struct compat_siginfo __user *, int,
struct compat_rusage __user *);
struct compat_dirent {
u32 d_ino;
compat_off_t d_off;
u16 d_reclen;
char d_name[256];
};
struct compat_ustat {
compat_daddr_t f_tfree;
compat_ino_t f_tinode;
char f_fname[6];
char f_fpack[6];
};
typedef union compat_sigval {
compat_int_t sival_int;
compat_uptr_t sival_ptr;
} compat_sigval_t;
#define COMPAT_SIGEV_PAD_SIZE ((SIGEV_MAX_SIZE/sizeof(int)) - 3)
typedef struct compat_sigevent {
compat_sigval_t sigev_value;
compat_int_t sigev_signo;
compat_int_t sigev_notify;
union {
compat_int_t _pad[COMPAT_SIGEV_PAD_SIZE];
compat_int_t _tid;
struct {
compat_uptr_t _function;
compat_uptr_t _attribute;
} _sigev_thread;
} _sigev_un;
} compat_sigevent_t;
struct compat_ifmap {
compat_ulong_t mem_start;
compat_ulong_t mem_end;
unsigned short base_addr;
unsigned char irq;
unsigned char dma;
unsigned char port;
};
struct compat_if_settings {
unsigned int type; /* Type of physical device or protocol */
unsigned int size; /* Size of the data allocated by the caller */
compat_uptr_t ifs_ifsu; /* union of pointers */
};
struct compat_ifreq {
union {
char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */
} ifr_ifrn;
union {
struct sockaddr ifru_addr;
struct sockaddr ifru_dstaddr;
struct sockaddr ifru_broadaddr;
struct sockaddr ifru_netmask;
struct sockaddr ifru_hwaddr;
short ifru_flags;
compat_int_t ifru_ivalue;
compat_int_t ifru_mtu;
struct compat_ifmap ifru_map;
char ifru_slave[IFNAMSIZ]; /* Just fits the size */
char ifru_newname[IFNAMSIZ];
compat_caddr_t ifru_data;
struct compat_if_settings ifru_settings;
} ifr_ifru;
};
struct compat_ifconf {
compat_int_t ifc_len; /* size of buffer */
compat_caddr_t ifcbuf;
};
struct compat_robust_list {
compat_uptr_t next;
};
struct compat_robust_list_head {
struct compat_robust_list list;
compat_long_t futex_offset;
compat_uptr_t list_op_pending;
};
struct compat_statfs;
struct compat_statfs64;
struct compat_old_linux_dirent;
struct compat_linux_dirent;
struct linux_dirent64;
struct compat_msghdr;
struct compat_mmsghdr;
struct compat_sysinfo;
struct compat_sysctl_args;
struct compat_kexec_segment;
struct compat_mq_attr;
extern void compat_exit_robust_list(struct task_struct *curr);
asmlinkage long
compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
compat_size_t len);
asmlinkage long
compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
compat_size_t __user *len_ptr);
long compat_sys_semctl(int first, int second, int third, void __user *uptr);
long compat_sys_msgsnd(int first, int second, int third, void __user *uptr);
long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
int version, void __user *uptr);
long compat_sys_msgctl(int first, int second, void __user *uptr);
long compat_sys_shmat(int first, int second, compat_uptr_t third, int version,
void __user *uptr);
long compat_sys_shmctl(int first, int second, void __user *uptr);
long compat_sys_semtimedop(int semid, struct sembuf __user *tsems,
unsigned nsems, const struct compat_timespec __user *timeout);
asmlinkage long compat_sys_keyctl(u32 option,
u32 arg2, u32 arg3, u32 arg4, u32 arg5);
asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32);
asmlinkage ssize_t compat_sys_readv(unsigned long fd,
const struct compat_iovec __user *vec, unsigned long vlen);
asmlinkage ssize_t compat_sys_writev(unsigned long fd,
const struct compat_iovec __user *vec, unsigned long vlen);
preadv/pwritev: Add preadv and pwritev system calls. This patch adds preadv and pwritev system calls. These syscalls are a pretty straightforward combination of pread and readv (same for write). They are quite useful for doing vectored I/O in threaded applications. Using lseek+readv instead opens race windows you'll have to plug with locking. Other systems have such system calls too, for example NetBSD, check here: http://www.daemon-systems.org/man/preadv.2.html The application-visible interface provided by glibc should look like this to be compatible to the existing implementations in the *BSD family: ssize_t preadv(int d, const struct iovec *iov, int iovcnt, off_t offset); ssize_t pwritev(int d, const struct iovec *iov, int iovcnt, off_t offset); This prototype has one problem though: On 32bit archs is the (64bit) offset argument unaligned, which the syscall ABI of several archs doesn't allow to do. At least s390 needs a wrapper in glibc to handle this. As we'll need a wrappers in glibc anyway I've decided to push problem to glibc entriely and use a syscall prototype which works without arch-specific wrappers inside the kernel: The offset argument is explicitly splitted into two 32bit values. The patch sports the actual system call implementation and the windup in the x86 system call tables. Other archs follow as separate patches. Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: <linux-api@vger.kernel.org> Cc: <linux-arch@vger.kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-04-03 07:59:23 +08:00
asmlinkage ssize_t compat_sys_preadv(unsigned long fd,
const struct compat_iovec __user *vec,
Make non-compat preadv/pwritev use native register size Instead of always splitting the file offset into 32-bit 'high' and 'low' parts, just split them into the largest natural word-size - which in C terms is 'unsigned long'. This allows 64-bit architectures to avoid the unnecessary 32-bit shifting and masking for native format (while the compat interfaces will obviously always have to do it). This also changes the order of 'high' and 'low' to be "low first". Why? Because when we have it like this, the 64-bit system calls now don't use the "pos_high" argument at all, and it makes more sense for the native system call to simply match the user-mode prototype. This results in a much more natural calling convention, and allows the compiler to generate much more straightforward code. On x86-64, we now generate testq %rcx, %rcx # pos_l js .L122 #, movq %rcx, -48(%rbp) # pos_l, pos from the C source loff_t pos = pos_from_hilo(pos_h, pos_l); ... if (pos < 0) return -EINVAL; and the 'pos_h' register isn't even touched. It used to generate code like mov %r8d, %r8d # pos_low, pos_low salq $32, %rcx #, tmp71 movq %r8, %rax # pos_low, pos.386 orq %rcx, %rax # tmp71, pos.386 js .L122 #, movq %rax, -48(%rbp) # pos.386, pos which isn't _that_ horrible, but it does show how the natural word size is just a more sensible interface (same arguments will hold in the user level glibc wrapper function, of course, so the kernel side is just half of the equation!) Note: in all cases the user code wrapper can again be the same. You can just do #define HALF_BITS (sizeof(unsigned long)*4) __syscall(PWRITEV, fd, iov, count, offset, (offset >> HALF_BITS) >> HALF_BITS); or something like that. That way the user mode wrapper will also be nicely passing in a zero (it won't actually have to do the shifts, the compiler will understand what is going on) for the last argument. And that is a good idea, even if nobody will necessarily ever care: if we ever do move to a 128-bit lloff_t, this particular system call might be left alone. Of course, that will be the least of our worries if we really ever need to care, so this may not be worth really caring about. [ Fixed for lost 'loff_t' cast noticed by Andrew Morton ] Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: Ingo Molnar <mingo@elte.hu> Cc: Ralf Baechle <ralf@linux-mips.org>> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-04-03 23:03:22 +08:00
unsigned long vlen, u32 pos_low, u32 pos_high);
preadv/pwritev: Add preadv and pwritev system calls. This patch adds preadv and pwritev system calls. These syscalls are a pretty straightforward combination of pread and readv (same for write). They are quite useful for doing vectored I/O in threaded applications. Using lseek+readv instead opens race windows you'll have to plug with locking. Other systems have such system calls too, for example NetBSD, check here: http://www.daemon-systems.org/man/preadv.2.html The application-visible interface provided by glibc should look like this to be compatible to the existing implementations in the *BSD family: ssize_t preadv(int d, const struct iovec *iov, int iovcnt, off_t offset); ssize_t pwritev(int d, const struct iovec *iov, int iovcnt, off_t offset); This prototype has one problem though: On 32bit archs is the (64bit) offset argument unaligned, which the syscall ABI of several archs doesn't allow to do. At least s390 needs a wrapper in glibc to handle this. As we'll need a wrappers in glibc anyway I've decided to push problem to glibc entriely and use a syscall prototype which works without arch-specific wrappers inside the kernel: The offset argument is explicitly splitted into two 32bit values. The patch sports the actual system call implementation and the windup in the x86 system call tables. Other archs follow as separate patches. Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: <linux-api@vger.kernel.org> Cc: <linux-arch@vger.kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-04-03 07:59:23 +08:00
asmlinkage ssize_t compat_sys_pwritev(unsigned long fd,
const struct compat_iovec __user *vec,
Make non-compat preadv/pwritev use native register size Instead of always splitting the file offset into 32-bit 'high' and 'low' parts, just split them into the largest natural word-size - which in C terms is 'unsigned long'. This allows 64-bit architectures to avoid the unnecessary 32-bit shifting and masking for native format (while the compat interfaces will obviously always have to do it). This also changes the order of 'high' and 'low' to be "low first". Why? Because when we have it like this, the 64-bit system calls now don't use the "pos_high" argument at all, and it makes more sense for the native system call to simply match the user-mode prototype. This results in a much more natural calling convention, and allows the compiler to generate much more straightforward code. On x86-64, we now generate testq %rcx, %rcx # pos_l js .L122 #, movq %rcx, -48(%rbp) # pos_l, pos from the C source loff_t pos = pos_from_hilo(pos_h, pos_l); ... if (pos < 0) return -EINVAL; and the 'pos_h' register isn't even touched. It used to generate code like mov %r8d, %r8d # pos_low, pos_low salq $32, %rcx #, tmp71 movq %r8, %rax # pos_low, pos.386 orq %rcx, %rax # tmp71, pos.386 js .L122 #, movq %rax, -48(%rbp) # pos.386, pos which isn't _that_ horrible, but it does show how the natural word size is just a more sensible interface (same arguments will hold in the user level glibc wrapper function, of course, so the kernel side is just half of the equation!) Note: in all cases the user code wrapper can again be the same. You can just do #define HALF_BITS (sizeof(unsigned long)*4) __syscall(PWRITEV, fd, iov, count, offset, (offset >> HALF_BITS) >> HALF_BITS); or something like that. That way the user mode wrapper will also be nicely passing in a zero (it won't actually have to do the shifts, the compiler will understand what is going on) for the last argument. And that is a good idea, even if nobody will necessarily ever care: if we ever do move to a 128-bit lloff_t, this particular system call might be left alone. Of course, that will be the least of our worries if we really ever need to care, so this may not be worth really caring about. [ Fixed for lost 'loff_t' cast noticed by Andrew Morton ] Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: Ingo Molnar <mingo@elte.hu> Cc: Ralf Baechle <ralf@linux-mips.org>> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-04-03 23:03:22 +08:00
unsigned long vlen, u32 pos_low, u32 pos_high);
int compat_do_execve(char *filename, compat_uptr_t __user *argv,
compat_uptr_t __user *envp, struct pt_regs *regs);
asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
compat_ulong_t __user *outp, compat_ulong_t __user *exp,
struct compat_timeval __user *tvp);
asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg);
asmlinkage long compat_sys_wait4(compat_pid_t pid,
compat_uint_t __user *stat_addr, int options,
struct compat_rusage __user *ru);
#define BITS_PER_COMPAT_LONG (8*sizeof(compat_long_t))
#define BITS_TO_COMPAT_LONGS(bits) \
(((bits)+BITS_PER_COMPAT_LONG-1)/BITS_PER_COMPAT_LONG)
long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
unsigned long bitmap_size);
long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
unsigned long bitmap_size);
int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from);
int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
int get_compat_sigevent(struct sigevent *event,
const struct compat_sigevent __user *u_event);
long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
struct compat_siginfo __user *uinfo);
static inline int compat_timeval_compare(struct compat_timeval *lhs,
struct compat_timeval *rhs)
{
if (lhs->tv_sec < rhs->tv_sec)
return -1;
if (lhs->tv_sec > rhs->tv_sec)
return 1;
return lhs->tv_usec - rhs->tv_usec;
}
static inline int compat_timespec_compare(struct compat_timespec *lhs,
struct compat_timespec *rhs)
{
if (lhs->tv_sec < rhs->tv_sec)
return -1;
if (lhs->tv_sec > rhs->tv_sec)
return 1;
return lhs->tv_nsec - rhs->tv_nsec;
}
extern int get_compat_itimerspec(struct itimerspec *dst,
const struct compat_itimerspec __user *src);
extern int put_compat_itimerspec(struct compat_itimerspec __user *dst,
const struct itimerspec *src);
asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
struct timezone __user *tz);
asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
struct timezone __user *tz);
asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
extern int compat_printk(const char *fmt, ...);
extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
const compat_ulong_t __user *new_nodes);
extern int compat_ptrace_request(struct task_struct *child,
compat_long_t request,
compat_ulong_t addr, compat_ulong_t data);
extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
compat_ulong_t addr, compat_ulong_t data);
asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
compat_long_t addr, compat_long_t data);
/*
* epoll (fs/eventpoll.c) compat bits follow ...
*/
struct epoll_event;
#define compat_epoll_event epoll_event
asmlinkage long compat_sys_epoll_pwait(int epfd,
struct compat_epoll_event __user *events,
int maxevents, int timeout,
const compat_sigset_t __user *sigmask,
compat_size_t sigsetsize);
asmlinkage long compat_sys_utime(const char __user *filename,
struct compat_utimbuf __user *t);
asmlinkage long compat_sys_utimensat(unsigned int dfd,
const char __user *filename,
struct compat_timespec __user *t,
int flags);
asmlinkage long compat_sys_time(compat_time_t __user *tloc);
asmlinkage long compat_sys_stime(compat_time_t __user *tptr);
asmlinkage long compat_sys_signalfd(int ufd,
const compat_sigset_t __user *sigmask,
compat_size_t sigsetsize);
timerfd: new timerfd API This is the new timerfd API as it is implemented by the following patch: int timerfd_create(int clockid, int flags); int timerfd_settime(int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr); int timerfd_gettime(int ufd, struct itimerspec *otmr); The timerfd_create() API creates an un-programmed timerfd fd. The "clockid" parameter can be either CLOCK_MONOTONIC or CLOCK_REALTIME. The timerfd_settime() API give new settings by the timerfd fd, by optionally retrieving the previous expiration time (in case the "otmr" parameter is not NULL). The time value specified in "utmr" is absolute, if the TFD_TIMER_ABSTIME bit is set in the "flags" parameter. Otherwise it's a relative time. The timerfd_gettime() API returns the next expiration time of the timer, or {0, 0} if the timerfd has not been set yet. Like the previous timerfd API implementation, read(2) and poll(2) are supported (with the same interface). Here's a simple test program I used to exercise the new timerfd APIs: http://www.xmailserver.org/timerfd-test2.c [akpm@linux-foundation.org: coding-style cleanups] [akpm@linux-foundation.org: fix ia64 build] [akpm@linux-foundation.org: fix m68k build] [akpm@linux-foundation.org: fix mips build] [akpm@linux-foundation.org: fix alpha, arm, blackfin, cris, m68k, s390, sparc and sparc64 builds] [heiko.carstens@de.ibm.com: fix s390] [akpm@linux-foundation.org: fix powerpc build] [akpm@linux-foundation.org: fix sparc64 more] Signed-off-by: Davide Libenzi <davidel@xmailserver.org> Cc: Michael Kerrisk <mtk-manpages@gmx.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Davide Libenzi <davidel@xmailserver.org> Cc: Michael Kerrisk <mtk-manpages@gmx.net> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Davide Libenzi <davidel@xmailserver.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-05 14:27:26 +08:00
asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
const struct compat_itimerspec __user *utmr,
struct compat_itimerspec __user *otmr);
asmlinkage long compat_sys_timerfd_gettime(int ufd,
struct compat_itimerspec __user *otmr);
asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page,
__u32 __user *pages,
const int __user *nodes,
int __user *status,
int flags);
asmlinkage long compat_sys_futimesat(unsigned int dfd,
const char __user *filename,
struct compat_timeval __user *t);
asmlinkage long compat_sys_utimes(const char __user *filename,
struct compat_timeval __user *t);
asmlinkage long compat_sys_newstat(const char __user *filename,
struct compat_stat __user *statbuf);
asmlinkage long compat_sys_newlstat(const char __user *filename,
struct compat_stat __user *statbuf);
asmlinkage long compat_sys_newfstatat(unsigned int dfd,
const char __user *filename,
struct compat_stat __user *statbuf,
int flag);
asmlinkage long compat_sys_newfstat(unsigned int fd,
struct compat_stat __user *statbuf);
asmlinkage long compat_sys_statfs(const char __user *pathname,
struct compat_statfs __user *buf);
asmlinkage long compat_sys_fstatfs(unsigned int fd,
struct compat_statfs __user *buf);
asmlinkage long compat_sys_statfs64(const char __user *pathname,
compat_size_t sz,
struct compat_statfs64 __user *buf);
asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
struct compat_statfs64 __user *buf);
asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
unsigned long arg);
asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
unsigned long arg);
asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p);
asmlinkage long compat_sys_io_getevents(aio_context_t ctx_id,
unsigned long min_nr,
unsigned long nr,
struct io_event __user *events,
struct compat_timespec __user *timeout);
asmlinkage long compat_sys_io_submit(aio_context_t ctx_id, int nr,
u32 __user *iocb);
asmlinkage long compat_sys_mount(const char __user *dev_name,
const char __user *dir_name,
const char __user *type, unsigned long flags,
const void __user *data);
asmlinkage long compat_sys_old_readdir(unsigned int fd,
struct compat_old_linux_dirent __user *,
unsigned int count);
asmlinkage long compat_sys_getdents(unsigned int fd,
struct compat_linux_dirent __user *dirent,
unsigned int count);
asmlinkage long compat_sys_getdents64(unsigned int fd,
struct linux_dirent64 __user *dirent,
unsigned int count);
asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *,
unsigned int nr_segs, unsigned int flags);
asmlinkage long compat_sys_open(const char __user *filename, int flags,
umode_t mode);
asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
int flags, umode_t mode);
asmlinkage long compat_sys_open_by_handle_at(int mountdirfd,
struct file_handle __user *handle,
int flags);
asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
compat_ulong_t __user *outp,
compat_ulong_t __user *exp,
struct compat_timespec __user *tsp,
void __user *sig);
asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
unsigned int nfds,
struct compat_timespec __user *tsp,
const compat_sigset_t __user *sigmask,
compat_size_t sigsetsize);
asmlinkage long compat_sys_signalfd4(int ufd,
const compat_sigset_t __user *sigmask,
compat_size_t sigsetsize, int flags);
asmlinkage long compat_sys_get_mempolicy(int __user *policy,
compat_ulong_t __user *nmask,
compat_ulong_t maxnode,
compat_ulong_t addr,
compat_ulong_t flags);
asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask,
compat_ulong_t maxnode);
asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
compat_ulong_t mode,
compat_ulong_t __user *nmask,
compat_ulong_t maxnode, compat_ulong_t flags);
asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
char __user *optval, unsigned int optlen);
asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg,
unsigned flags);
asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
unsigned vlen, unsigned int flags);
asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg,
unsigned int flags);
asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len,
unsigned flags);
asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len,
unsigned flags, struct sockaddr __user *addr,
int __user *addrlen);
asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
unsigned vlen, unsigned int flags,
struct compat_timespec __user *timeout);
asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
struct compat_timespec __user *rmtp);
asmlinkage long compat_sys_getitimer(int which,
struct compat_itimerval __user *it);
asmlinkage long compat_sys_setitimer(int which,
struct compat_itimerval __user *in,
struct compat_itimerval __user *out);
asmlinkage long compat_sys_times(struct compat_tms __user *tbuf);
asmlinkage long compat_sys_setrlimit(unsigned int resource,
struct compat_rlimit __user *rlim);
asmlinkage long compat_sys_getrlimit(unsigned int resource,
struct compat_rlimit __user *rlim);
asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru);
asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid,
unsigned int len,
compat_ulong_t __user *user_mask_ptr);
asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid,
unsigned int len,
compat_ulong_t __user *user_mask_ptr);
asmlinkage long compat_sys_timer_create(clockid_t which_clock,
struct compat_sigevent __user *timer_event_spec,
timer_t __user *created_timer_id);
asmlinkage long compat_sys_timer_settime(timer_t timer_id, int flags,
struct compat_itimerspec __user *new,
struct compat_itimerspec __user *old);
asmlinkage long compat_sys_timer_gettime(timer_t timer_id,
struct compat_itimerspec __user *setting);
asmlinkage long compat_sys_clock_settime(clockid_t which_clock,
struct compat_timespec __user *tp);
asmlinkage long compat_sys_clock_gettime(clockid_t which_clock,
struct compat_timespec __user *tp);
asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock,
struct compat_timex __user *tp);
asmlinkage long compat_sys_clock_getres(clockid_t which_clock,
struct compat_timespec __user *tp);
asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
struct compat_timespec __user *rqtp,
struct compat_timespec __user *rmtp);
asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese,
struct compat_siginfo __user *uinfo,
struct compat_timespec __user *uts, compat_size_t sigsetsize);
asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset,
compat_size_t sigsetsize);
asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
unsigned long arg);
asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
struct compat_timespec __user *utime, u32 __user *uaddr2,
u32 val3);
asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
char __user *optval, int __user *optlen);
asmlinkage long compat_sys_kexec_load(unsigned long entry,
unsigned long nr_segments,
struct compat_kexec_segment __user *,
unsigned long flags);
asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes,
const struct compat_mq_attr __user *u_mqstat,
struct compat_mq_attr __user *u_omqstat);
asmlinkage long compat_sys_mq_notify(mqd_t mqdes,
const struct compat_sigevent __user *u_notification);
asmlinkage long compat_sys_mq_open(const char __user *u_name,
int oflag, compat_mode_t mode,
struct compat_mq_attr __user *u_attr);
asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes,
const char __user *u_msg_ptr,
size_t msg_len, unsigned int msg_prio,
const struct compat_timespec __user *u_abs_timeout);
asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes,
char __user *u_msg_ptr,
size_t msg_len, unsigned int __user *u_msg_prio,
const struct compat_timespec __user *u_abs_timeout);
asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args);
extern ssize_t compat_rw_copy_check_uvector(int type,
const struct compat_iovec __user *uvector,
unsigned long nr_segs,
unsigned long fast_segs, struct iovec *fast_pointer,
Cross Memory Attach The basic idea behind cross memory attach is to allow MPI programs doing intra-node communication to do a single copy of the message rather than a double copy of the message via shared memory. The following patch attempts to achieve this by allowing a destination process, given an address and size from a source process, to copy memory directly from the source process into its own address space via a system call. There is also a symmetrical ability to copy from the current process's address space into a destination process's address space. - Use of /proc/pid/mem has been considered, but there are issues with using it: - Does not allow for specifying iovecs for both src and dest, assuming preadv or pwritev was implemented either the area read from or written to would need to be contiguous. - Currently mem_read allows only processes who are currently ptrace'ing the target and are still able to ptrace the target to read from the target. This check could possibly be moved to the open call, but its not clear exactly what race this restriction is stopping (reason appears to have been lost) - Having to send the fd of /proc/self/mem via SCM_RIGHTS on unix domain socket is a bit ugly from a userspace point of view, especially when you may have hundreds if not (eventually) thousands of processes that all need to do this with each other - Doesn't allow for some future use of the interface we would like to consider adding in the future (see below) - Interestingly reading from /proc/pid/mem currently actually involves two copies! (But this could be fixed pretty easily) As mentioned previously use of vmsplice instead was considered, but has problems. Since you need the reader and writer working co-operatively if the pipe is not drained then you block. Which requires some wrapping to do non blocking on the send side or polling on the receive. In all to all communication it requires ordering otherwise you can deadlock. And in the example of many MPI tasks writing to one MPI task vmsplice serialises the copying. There are some cases of MPI collectives where even a single copy interface does not get us the performance gain we could. For example in an MPI_Reduce rather than copy the data from the source we would like to instead use it directly in a mathops (say the reduce is doing a sum) as this would save us doing a copy. We don't need to keep a copy of the data from the source. I haven't implemented this, but I think this interface could in the future do all this through the use of the flags - eg could specify the math operation and type and the kernel rather than just copying the data would apply the specified operation between the source and destination and store it in the destination. Although we don't have a "second user" of the interface (though I've had some nibbles from people who may be interested in using it for intra process messaging which is not MPI). This interface is something which hardware vendors are already doing for their custom drivers to implement fast local communication. And so in addition to this being useful for OpenMPI it would mean the driver maintainers don't have to fix things up when the mm changes. There was some discussion about how much faster a true zero copy would go. Here's a link back to the email with some testing I did on that: http://marc.info/?l=linux-mm&m=130105930902915&w=2 There is a basic man page for the proposed interface here: http://ozlabs.org/~cyeoh/cma/process_vm_readv.txt This has been implemented for x86 and powerpc, other architecture should mainly (I think) just need to add syscall numbers for the process_vm_readv and process_vm_writev. There are 32 bit compatibility versions for 64-bit kernels. For arch maintainers there are some simple tests to be able to quickly verify that the syscalls are working correctly here: http://ozlabs.org/~cyeoh/cma/cma-test-20110718.tgz Signed-off-by: Chris Yeoh <yeohc@au1.ibm.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Howells <dhowells@redhat.com> Cc: James Morris <jmorris@namei.org> Cc: <linux-man@vger.kernel.org> Cc: <linux-arch@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-11-01 08:06:39 +08:00
struct iovec **ret_pointer,
int check_access);
compat: Make compat_alloc_user_space() incorporate the access_ok() compat_alloc_user_space() expects the caller to independently call access_ok() to verify the returned area. A missing call could introduce problems on some architectures. This patch incorporates the access_ok() check into compat_alloc_user_space() and also adds a sanity check on the length. The existing compat_alloc_user_space() implementations are renamed arch_compat_alloc_user_space() and are used as part of the implementation of the new global function. This patch assumes NULL will cause __get_user()/__put_user() to either fail or access userspace on all architectures. This should be followed by checking the return value of compat_access_user_space() for NULL in the callers, at which time the access_ok() in the callers can also be removed. Reported-by: Ben Hawkes <hawkes@sota.gen.nz> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Acked-by: Chris Metcalf <cmetcalf@tilera.com> Acked-by: David S. Miller <davem@davemloft.net> Acked-by: Ingo Molnar <mingo@elte.hu> Acked-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Tony Luck <tony.luck@intel.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: James Bottomley <jejb@parisc-linux.org> Cc: Kyle McMartin <kyle@mcmartin.ca> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: <stable@kernel.org>
2010-09-08 07:16:18 +08:00
extern void __user *compat_alloc_user_space(unsigned long len);
asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid,
const struct compat_iovec __user *lvec,
unsigned long liovcnt, const struct compat_iovec __user *rvec,
unsigned long riovcnt, unsigned long flags);
asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
const struct compat_iovec __user *lvec,
unsigned long liovcnt, const struct compat_iovec __user *rvec,
unsigned long riovcnt, unsigned long flags);
#else
#define is_compat_task() (0)
#endif /* CONFIG_COMPAT */
#endif /* _LINUX_COMPAT_H */