2011-11-12 07:55:49 +08:00
|
|
|
#
|
|
|
|
# 32-bit system call numbers and entry vectors
|
|
|
|
#
|
|
|
|
# The format is:
|
|
|
|
# <number> <abi> <name> <entry point> <compat entry point>
|
|
|
|
#
|
|
|
|
# The abi is always "i386" for this file.
|
|
|
|
#
|
|
|
|
0 i386 restart_syscall sys_restart_syscall
|
|
|
|
1 i386 exit sys_exit
|
2015-10-06 08:48:05 +08:00
|
|
|
2 i386 fork sys_fork sys_fork
|
2011-11-12 07:55:49 +08:00
|
|
|
3 i386 read sys_read
|
|
|
|
4 i386 write sys_write
|
|
|
|
5 i386 open sys_open compat_sys_open
|
|
|
|
6 i386 close sys_close
|
|
|
|
7 i386 waitpid sys_waitpid sys32_waitpid
|
|
|
|
8 i386 creat sys_creat
|
|
|
|
9 i386 link sys_link
|
|
|
|
10 i386 unlink sys_unlink
|
2015-10-06 08:48:05 +08:00
|
|
|
11 i386 execve sys_execve compat_sys_execve
|
2011-11-12 07:55:49 +08:00
|
|
|
12 i386 chdir sys_chdir
|
|
|
|
13 i386 time sys_time compat_sys_time
|
|
|
|
14 i386 mknod sys_mknod
|
|
|
|
15 i386 chmod sys_chmod
|
|
|
|
16 i386 lchown sys_lchown16
|
|
|
|
17 i386 break
|
|
|
|
18 i386 oldstat sys_stat
|
2013-02-24 23:52:26 +08:00
|
|
|
19 i386 lseek sys_lseek compat_sys_lseek
|
2011-11-12 07:55:49 +08:00
|
|
|
20 i386 getpid sys_getpid
|
|
|
|
21 i386 mount sys_mount compat_sys_mount
|
|
|
|
22 i386 umount sys_oldumount
|
|
|
|
23 i386 setuid sys_setuid16
|
|
|
|
24 i386 getuid sys_getuid16
|
|
|
|
25 i386 stime sys_stime compat_sys_stime
|
|
|
|
26 i386 ptrace sys_ptrace compat_sys_ptrace
|
|
|
|
27 i386 alarm sys_alarm
|
|
|
|
28 i386 oldfstat sys_fstat
|
|
|
|
29 i386 pause sys_pause
|
|
|
|
30 i386 utime sys_utime compat_sys_utime
|
|
|
|
31 i386 stty
|
|
|
|
32 i386 gtty
|
|
|
|
33 i386 access sys_access
|
|
|
|
34 i386 nice sys_nice
|
|
|
|
35 i386 ftime
|
|
|
|
36 i386 sync sys_sync
|
2013-02-25 03:00:48 +08:00
|
|
|
37 i386 kill sys_kill
|
2011-11-12 07:55:49 +08:00
|
|
|
38 i386 rename sys_rename
|
|
|
|
39 i386 mkdir sys_mkdir
|
|
|
|
40 i386 rmdir sys_rmdir
|
|
|
|
41 i386 dup sys_dup
|
|
|
|
42 i386 pipe sys_pipe
|
|
|
|
43 i386 times sys_times compat_sys_times
|
|
|
|
44 i386 prof
|
|
|
|
45 i386 brk sys_brk
|
|
|
|
46 i386 setgid sys_setgid16
|
|
|
|
47 i386 getgid sys_getgid16
|
|
|
|
48 i386 signal sys_signal
|
|
|
|
49 i386 geteuid sys_geteuid16
|
|
|
|
50 i386 getegid sys_getegid16
|
|
|
|
51 i386 acct sys_acct
|
|
|
|
52 i386 umount2 sys_umount
|
|
|
|
53 i386 lock
|
|
|
|
54 i386 ioctl sys_ioctl compat_sys_ioctl
|
|
|
|
55 i386 fcntl sys_fcntl compat_sys_fcntl64
|
|
|
|
56 i386 mpx
|
|
|
|
57 i386 setpgid sys_setpgid
|
|
|
|
58 i386 ulimit
|
|
|
|
59 i386 oldolduname sys_olduname
|
|
|
|
60 i386 umask sys_umask
|
|
|
|
61 i386 chroot sys_chroot
|
|
|
|
62 i386 ustat sys_ustat compat_sys_ustat
|
|
|
|
63 i386 dup2 sys_dup2
|
|
|
|
64 i386 getppid sys_getppid
|
|
|
|
65 i386 getpgrp sys_getpgrp
|
|
|
|
66 i386 setsid sys_setsid
|
2012-12-26 08:14:55 +08:00
|
|
|
67 i386 sigaction sys_sigaction compat_sys_sigaction
|
2011-11-12 07:55:49 +08:00
|
|
|
68 i386 sgetmask sys_sgetmask
|
|
|
|
69 i386 ssetmask sys_ssetmask
|
|
|
|
70 i386 setreuid sys_setreuid16
|
|
|
|
71 i386 setregid sys_setregid16
|
2012-12-26 05:09:20 +08:00
|
|
|
72 i386 sigsuspend sys_sigsuspend sys_sigsuspend
|
2011-11-12 07:55:49 +08:00
|
|
|
73 i386 sigpending sys_sigpending compat_sys_sigpending
|
|
|
|
74 i386 sethostname sys_sethostname
|
|
|
|
75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
|
|
|
|
76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit
|
|
|
|
77 i386 getrusage sys_getrusage compat_sys_getrusage
|
|
|
|
78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday
|
|
|
|
79 i386 settimeofday sys_settimeofday compat_sys_settimeofday
|
|
|
|
80 i386 getgroups sys_getgroups16
|
|
|
|
81 i386 setgroups sys_setgroups16
|
|
|
|
82 i386 select sys_old_select compat_sys_old_select
|
|
|
|
83 i386 symlink sys_symlink
|
|
|
|
84 i386 oldlstat sys_lstat
|
|
|
|
85 i386 readlink sys_readlink
|
|
|
|
86 i386 uselib sys_uselib
|
|
|
|
87 i386 swapon sys_swapon
|
|
|
|
88 i386 reboot sys_reboot
|
|
|
|
89 i386 readdir sys_old_readdir compat_sys_old_readdir
|
|
|
|
90 i386 mmap sys_old_mmap sys32_mmap
|
|
|
|
91 i386 munmap sys_munmap
|
2013-02-25 02:49:08 +08:00
|
|
|
92 i386 truncate sys_truncate compat_sys_truncate
|
|
|
|
93 i386 ftruncate sys_ftruncate compat_sys_ftruncate
|
2011-11-12 07:55:49 +08:00
|
|
|
94 i386 fchmod sys_fchmod
|
|
|
|
95 i386 fchown sys_fchown16
|
|
|
|
96 i386 getpriority sys_getpriority
|
|
|
|
97 i386 setpriority sys_setpriority
|
|
|
|
98 i386 profil
|
|
|
|
99 i386 statfs sys_statfs compat_sys_statfs
|
|
|
|
100 i386 fstatfs sys_fstatfs compat_sys_fstatfs
|
|
|
|
101 i386 ioperm sys_ioperm
|
|
|
|
102 i386 socketcall sys_socketcall compat_sys_socketcall
|
|
|
|
103 i386 syslog sys_syslog
|
|
|
|
104 i386 setitimer sys_setitimer compat_sys_setitimer
|
|
|
|
105 i386 getitimer sys_getitimer compat_sys_getitimer
|
|
|
|
106 i386 stat sys_newstat compat_sys_newstat
|
|
|
|
107 i386 lstat sys_newlstat compat_sys_newlstat
|
|
|
|
108 i386 fstat sys_newfstat compat_sys_newfstat
|
|
|
|
109 i386 olduname sys_uname
|
2012-11-20 11:00:52 +08:00
|
|
|
110 i386 iopl sys_iopl
|
2011-11-12 07:55:49 +08:00
|
|
|
111 i386 vhangup sys_vhangup
|
|
|
|
112 i386 idle
|
2015-03-04 11:31:34 +08:00
|
|
|
113 i386 vm86old sys_vm86old sys_ni_syscall
|
2011-11-12 07:55:49 +08:00
|
|
|
114 i386 wait4 sys_wait4 compat_sys_wait4
|
|
|
|
115 i386 swapoff sys_swapoff
|
|
|
|
116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
|
2013-01-22 12:15:25 +08:00
|
|
|
117 i386 ipc sys_ipc compat_sys_ipc
|
2011-11-12 07:55:49 +08:00
|
|
|
118 i386 fsync sys_fsync
|
2015-10-06 08:48:05 +08:00
|
|
|
119 i386 sigreturn sys_sigreturn sys32_sigreturn
|
2012-10-23 10:34:11 +08:00
|
|
|
120 i386 clone sys_clone stub32_clone
|
2011-11-12 07:55:49 +08:00
|
|
|
121 i386 setdomainname sys_setdomainname
|
|
|
|
122 i386 uname sys_newuname
|
|
|
|
123 i386 modify_ldt sys_modify_ldt
|
|
|
|
124 i386 adjtimex sys_adjtimex compat_sys_adjtimex
|
2013-02-25 03:00:48 +08:00
|
|
|
125 i386 mprotect sys_mprotect
|
2011-11-12 07:55:49 +08:00
|
|
|
126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask
|
|
|
|
127 i386 create_module
|
|
|
|
128 i386 init_module sys_init_module
|
|
|
|
129 i386 delete_module sys_delete_module
|
|
|
|
130 i386 get_kernel_syms
|
|
|
|
131 i386 quotactl sys_quotactl sys32_quotactl
|
|
|
|
132 i386 getpgid sys_getpgid
|
|
|
|
133 i386 fchdir sys_fchdir
|
|
|
|
134 i386 bdflush sys_bdflush
|
|
|
|
135 i386 sysfs sys_sysfs
|
|
|
|
136 i386 personality sys_personality
|
|
|
|
137 i386 afs_syscall
|
|
|
|
138 i386 setfsuid sys_setfsuid16
|
|
|
|
139 i386 setfsgid sys_setfsgid16
|
|
|
|
140 i386 _llseek sys_llseek
|
|
|
|
141 i386 getdents sys_getdents compat_sys_getdents
|
|
|
|
142 i386 _newselect sys_select compat_sys_select
|
|
|
|
143 i386 flock sys_flock
|
|
|
|
144 i386 msync sys_msync
|
|
|
|
145 i386 readv sys_readv compat_sys_readv
|
|
|
|
146 i386 writev sys_writev compat_sys_writev
|
|
|
|
147 i386 getsid sys_getsid
|
|
|
|
148 i386 fdatasync sys_fdatasync
|
|
|
|
149 i386 _sysctl sys_sysctl compat_sys_sysctl
|
|
|
|
150 i386 mlock sys_mlock
|
|
|
|
151 i386 munlock sys_munlock
|
|
|
|
152 i386 mlockall sys_mlockall
|
|
|
|
153 i386 munlockall sys_munlockall
|
|
|
|
154 i386 sched_setparam sys_sched_setparam
|
|
|
|
155 i386 sched_getparam sys_sched_getparam
|
|
|
|
156 i386 sched_setscheduler sys_sched_setscheduler
|
|
|
|
157 i386 sched_getscheduler sys_sched_getscheduler
|
|
|
|
158 i386 sched_yield sys_sched_yield
|
|
|
|
159 i386 sched_get_priority_max sys_sched_get_priority_max
|
|
|
|
160 i386 sched_get_priority_min sys_sched_get_priority_min
|
2012-12-26 06:19:57 +08:00
|
|
|
161 i386 sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
|
2011-11-12 07:55:49 +08:00
|
|
|
162 i386 nanosleep sys_nanosleep compat_sys_nanosleep
|
|
|
|
163 i386 mremap sys_mremap
|
|
|
|
164 i386 setresuid sys_setresuid16
|
|
|
|
165 i386 getresuid sys_getresuid16
|
2015-03-04 11:31:34 +08:00
|
|
|
166 i386 vm86 sys_vm86 sys_ni_syscall
|
2011-11-12 07:55:49 +08:00
|
|
|
167 i386 query_module
|
|
|
|
168 i386 poll sys_poll
|
|
|
|
169 i386 nfsservctl
|
|
|
|
170 i386 setresgid sys_setresgid16
|
|
|
|
171 i386 getresgid sys_getresgid16
|
|
|
|
172 i386 prctl sys_prctl
|
2015-10-06 08:48:05 +08:00
|
|
|
173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn
|
2012-12-26 07:42:26 +08:00
|
|
|
174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
|
2012-02-20 01:48:01 +08:00
|
|
|
175 i386 rt_sigprocmask sys_rt_sigprocmask
|
2012-12-26 03:46:17 +08:00
|
|
|
176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
|
2011-11-12 07:55:49 +08:00
|
|
|
177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
|
2012-12-26 04:26:55 +08:00
|
|
|
178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
|
2011-11-12 07:55:49 +08:00
|
|
|
179 i386 rt_sigsuspend sys_rt_sigsuspend
|
|
|
|
180 i386 pread64 sys_pread64 sys32_pread
|
|
|
|
181 i386 pwrite64 sys_pwrite64 sys32_pwrite
|
|
|
|
182 i386 chown sys_chown16
|
|
|
|
183 i386 getcwd sys_getcwd
|
|
|
|
184 i386 capget sys_capget
|
|
|
|
185 i386 capset sys_capset
|
2012-12-15 03:47:53 +08:00
|
|
|
186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack
|
2013-02-24 15:17:03 +08:00
|
|
|
187 i386 sendfile sys_sendfile compat_sys_sendfile
|
2011-11-12 07:55:49 +08:00
|
|
|
188 i386 getpmsg
|
|
|
|
189 i386 putpmsg
|
2015-10-06 08:48:05 +08:00
|
|
|
190 i386 vfork sys_vfork sys_vfork
|
2011-11-12 07:55:49 +08:00
|
|
|
191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
|
|
|
|
192 i386 mmap2 sys_mmap_pgoff
|
|
|
|
193 i386 truncate64 sys_truncate64 sys32_truncate64
|
|
|
|
194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64
|
|
|
|
195 i386 stat64 sys_stat64 sys32_stat64
|
|
|
|
196 i386 lstat64 sys_lstat64 sys32_lstat64
|
|
|
|
197 i386 fstat64 sys_fstat64 sys32_fstat64
|
|
|
|
198 i386 lchown32 sys_lchown
|
|
|
|
199 i386 getuid32 sys_getuid
|
|
|
|
200 i386 getgid32 sys_getgid
|
|
|
|
201 i386 geteuid32 sys_geteuid
|
|
|
|
202 i386 getegid32 sys_getegid
|
|
|
|
203 i386 setreuid32 sys_setreuid
|
|
|
|
204 i386 setregid32 sys_setregid
|
|
|
|
205 i386 getgroups32 sys_getgroups
|
|
|
|
206 i386 setgroups32 sys_setgroups
|
|
|
|
207 i386 fchown32 sys_fchown
|
|
|
|
208 i386 setresuid32 sys_setresuid
|
|
|
|
209 i386 getresuid32 sys_getresuid
|
|
|
|
210 i386 setresgid32 sys_setresgid
|
|
|
|
211 i386 getresgid32 sys_getresgid
|
|
|
|
212 i386 chown32 sys_chown
|
|
|
|
213 i386 setuid32 sys_setuid
|
|
|
|
214 i386 setgid32 sys_setgid
|
|
|
|
215 i386 setfsuid32 sys_setfsuid
|
|
|
|
216 i386 setfsgid32 sys_setfsgid
|
|
|
|
217 i386 pivot_root sys_pivot_root
|
|
|
|
218 i386 mincore sys_mincore
|
|
|
|
219 i386 madvise sys_madvise
|
2017-04-09 02:34:51 +08:00
|
|
|
220 i386 getdents64 sys_getdents64
|
2011-11-12 07:55:49 +08:00
|
|
|
221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64
|
|
|
|
# 222 is unused
|
|
|
|
# 223 is unused
|
|
|
|
224 i386 gettid sys_gettid
|
|
|
|
225 i386 readahead sys_readahead sys32_readahead
|
|
|
|
226 i386 setxattr sys_setxattr
|
|
|
|
227 i386 lsetxattr sys_lsetxattr
|
|
|
|
228 i386 fsetxattr sys_fsetxattr
|
|
|
|
229 i386 getxattr sys_getxattr
|
|
|
|
230 i386 lgetxattr sys_lgetxattr
|
|
|
|
231 i386 fgetxattr sys_fgetxattr
|
|
|
|
232 i386 listxattr sys_listxattr
|
|
|
|
233 i386 llistxattr sys_llistxattr
|
|
|
|
234 i386 flistxattr sys_flistxattr
|
|
|
|
235 i386 removexattr sys_removexattr
|
|
|
|
236 i386 lremovexattr sys_lremovexattr
|
|
|
|
237 i386 fremovexattr sys_fremovexattr
|
|
|
|
238 i386 tkill sys_tkill
|
|
|
|
239 i386 sendfile64 sys_sendfile64
|
|
|
|
240 i386 futex sys_futex compat_sys_futex
|
|
|
|
241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
|
|
|
|
242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
|
|
|
|
243 i386 set_thread_area sys_set_thread_area
|
|
|
|
244 i386 get_thread_area sys_get_thread_area
|
|
|
|
245 i386 io_setup sys_io_setup compat_sys_io_setup
|
|
|
|
246 i386 io_destroy sys_io_destroy
|
|
|
|
247 i386 io_getevents sys_io_getevents compat_sys_io_getevents
|
|
|
|
248 i386 io_submit sys_io_submit compat_sys_io_submit
|
|
|
|
249 i386 io_cancel sys_io_cancel
|
|
|
|
250 i386 fadvise64 sys_fadvise64 sys32_fadvise64
|
|
|
|
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
|
|
|
|
252 i386 exit_group sys_exit_group
|
2013-02-26 07:42:04 +08:00
|
|
|
253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
|
2011-11-12 07:55:49 +08:00
|
|
|
254 i386 epoll_create sys_epoll_create
|
|
|
|
255 i386 epoll_ctl sys_epoll_ctl
|
|
|
|
256 i386 epoll_wait sys_epoll_wait
|
|
|
|
257 i386 remap_file_pages sys_remap_file_pages
|
|
|
|
258 i386 set_tid_address sys_set_tid_address
|
|
|
|
259 i386 timer_create sys_timer_create compat_sys_timer_create
|
|
|
|
260 i386 timer_settime sys_timer_settime compat_sys_timer_settime
|
|
|
|
261 i386 timer_gettime sys_timer_gettime compat_sys_timer_gettime
|
|
|
|
262 i386 timer_getoverrun sys_timer_getoverrun
|
|
|
|
263 i386 timer_delete sys_timer_delete
|
|
|
|
264 i386 clock_settime sys_clock_settime compat_sys_clock_settime
|
|
|
|
265 i386 clock_gettime sys_clock_gettime compat_sys_clock_gettime
|
|
|
|
266 i386 clock_getres sys_clock_getres compat_sys_clock_getres
|
|
|
|
267 i386 clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
|
|
|
|
268 i386 statfs64 sys_statfs64 compat_sys_statfs64
|
|
|
|
269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
|
|
|
|
270 i386 tgkill sys_tgkill
|
|
|
|
271 i386 utimes sys_utimes compat_sys_utimes
|
|
|
|
272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
|
|
|
|
273 i386 vserver
|
|
|
|
274 i386 mbind sys_mbind
|
|
|
|
275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
|
|
|
|
276 i386 set_mempolicy sys_set_mempolicy
|
|
|
|
277 i386 mq_open sys_mq_open compat_sys_mq_open
|
|
|
|
278 i386 mq_unlink sys_mq_unlink
|
|
|
|
279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
|
|
|
|
280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
|
|
|
|
281 i386 mq_notify sys_mq_notify compat_sys_mq_notify
|
2012-03-22 05:50:08 +08:00
|
|
|
282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
|
2011-11-12 07:55:49 +08:00
|
|
|
283 i386 kexec_load sys_kexec_load compat_sys_kexec_load
|
|
|
|
284 i386 waitid sys_waitid compat_sys_waitid
|
|
|
|
# 285 sys_setaltroot
|
|
|
|
286 i386 add_key sys_add_key
|
|
|
|
287 i386 request_key sys_request_key
|
2016-07-27 18:42:38 +08:00
|
|
|
288 i386 keyctl sys_keyctl compat_sys_keyctl
|
2011-11-12 07:55:49 +08:00
|
|
|
289 i386 ioprio_set sys_ioprio_set
|
|
|
|
290 i386 ioprio_get sys_ioprio_get
|
|
|
|
291 i386 inotify_init sys_inotify_init
|
|
|
|
292 i386 inotify_add_watch sys_inotify_add_watch
|
|
|
|
293 i386 inotify_rm_watch sys_inotify_rm_watch
|
|
|
|
294 i386 migrate_pages sys_migrate_pages
|
|
|
|
295 i386 openat sys_openat compat_sys_openat
|
|
|
|
296 i386 mkdirat sys_mkdirat
|
|
|
|
297 i386 mknodat sys_mknodat
|
|
|
|
298 i386 fchownat sys_fchownat
|
|
|
|
299 i386 futimesat sys_futimesat compat_sys_futimesat
|
|
|
|
300 i386 fstatat64 sys_fstatat64 sys32_fstatat
|
|
|
|
301 i386 unlinkat sys_unlinkat
|
|
|
|
302 i386 renameat sys_renameat
|
|
|
|
303 i386 linkat sys_linkat
|
|
|
|
304 i386 symlinkat sys_symlinkat
|
|
|
|
305 i386 readlinkat sys_readlinkat
|
|
|
|
306 i386 fchmodat sys_fchmodat
|
|
|
|
307 i386 faccessat sys_faccessat
|
|
|
|
308 i386 pselect6 sys_pselect6 compat_sys_pselect6
|
|
|
|
309 i386 ppoll sys_ppoll compat_sys_ppoll
|
|
|
|
310 i386 unshare sys_unshare
|
|
|
|
311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
|
|
|
|
312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
|
|
|
|
313 i386 splice sys_splice
|
|
|
|
314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range
|
|
|
|
315 i386 tee sys_tee
|
|
|
|
316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
|
|
|
|
317 i386 move_pages sys_move_pages compat_sys_move_pages
|
|
|
|
318 i386 getcpu sys_getcpu
|
|
|
|
319 i386 epoll_pwait sys_epoll_pwait
|
|
|
|
320 i386 utimensat sys_utimensat compat_sys_utimensat
|
|
|
|
321 i386 signalfd sys_signalfd compat_sys_signalfd
|
|
|
|
322 i386 timerfd_create sys_timerfd_create
|
|
|
|
323 i386 eventfd sys_eventfd
|
|
|
|
324 i386 fallocate sys_fallocate sys32_fallocate
|
|
|
|
325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
|
|
|
|
326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
|
|
|
|
327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
|
|
|
|
328 i386 eventfd2 sys_eventfd2
|
|
|
|
329 i386 epoll_create1 sys_epoll_create1
|
|
|
|
330 i386 dup3 sys_dup3
|
|
|
|
331 i386 pipe2 sys_pipe2
|
|
|
|
332 i386 inotify_init1 sys_inotify_init1
|
|
|
|
333 i386 preadv sys_preadv compat_sys_preadv
|
|
|
|
334 i386 pwritev sys_pwritev compat_sys_pwritev
|
|
|
|
335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
|
|
|
|
336 i386 perf_event_open sys_perf_event_open
|
|
|
|
337 i386 recvmmsg sys_recvmmsg compat_sys_recvmmsg
|
|
|
|
338 i386 fanotify_init sys_fanotify_init
|
2013-03-06 09:10:59 +08:00
|
|
|
339 i386 fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
|
2011-11-12 07:55:49 +08:00
|
|
|
340 i386 prlimit64 sys_prlimit64
|
|
|
|
341 i386 name_to_handle_at sys_name_to_handle_at
|
|
|
|
342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
|
|
|
|
343 i386 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
|
|
|
|
344 i386 syncfs sys_syncfs
|
|
|
|
345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg
|
|
|
|
346 i386 setns sys_setns
|
|
|
|
347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
|
|
|
|
348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
|
2012-06-01 07:26:44 +08:00
|
|
|
349 i386 kcmp sys_kcmp
|
module: add syscall to load module from fd
As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.
Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.
If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on. In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.
This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
2012-10-16 05:01:07 +08:00
|
|
|
350 i386 finit_module sys_finit_module
|
sched: Add new scheduler syscalls to support an extended scheduling parameters ABI
Add the syscalls needed for supporting scheduling algorithms
with extended scheduling parameters (e.g., SCHED_DEADLINE).
In general, it makes possible to specify a periodic/sporadic task,
that executes for a given amount of runtime at each instance, and is
scheduled according to the urgency of their own timing constraints,
i.e.:
- a (maximum/typical) instance execution time,
- a minimum interval between consecutive instances,
- a time constraint by which each instance must be completed.
Thus, both the data structure that holds the scheduling parameters of
the tasks and the system calls dealing with it must be extended.
Unfortunately, modifying the existing struct sched_param would break
the ABI and result in potentially serious compatibility issues with
legacy binaries.
For these reasons, this patch:
- defines the new struct sched_attr, containing all the fields
that are necessary for specifying a task in the computational
model described above;
- defines and implements the new scheduling related syscalls that
manipulate it, i.e., sched_setattr() and sched_getattr().
Syscalls are introduced for x86 (32 and 64 bits) and ARM only, as a
proof of concept and for developing and testing purposes. Making them
available on other architectures is straightforward.
Since no "user" for these new parameters is introduced in this patch,
the implementation of the new system calls is just identical to their
already existing counterpart. Future patches that implement scheduling
policies able to exploit the new data structure must also take care of
modifying the sched_*attr() calls accordingly with their own purposes.
Signed-off-by: Dario Faggioli <raistlin@linux.it>
[ Rewrote to use sched_attr. ]
Signed-off-by: Juri Lelli <juri.lelli@gmail.com>
[ Removed sched_setscheduler2() for now. ]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1383831828-15501-3-git-send-email-juri.lelli@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2013-11-07 21:43:36 +08:00
|
|
|
351 i386 sched_setattr sys_sched_setattr
|
|
|
|
352 i386 sched_getattr sys_sched_getattr
|
2014-04-11 18:25:37 +08:00
|
|
|
353 i386 renameat2 sys_renameat2
|
2014-06-26 07:08:24 +08:00
|
|
|
354 i386 seccomp sys_seccomp
|
random: introduce getrandom(2) system call
The getrandom(2) system call was requested by the LibreSSL Portable
developers. It is analoguous to the getentropy(2) system call in
OpenBSD.
The rationale of this system call is to provide resiliance against
file descriptor exhaustion attacks, where the attacker consumes all
available file descriptors, forcing the use of the fallback code where
/dev/[u]random is not available. Since the fallback code is often not
well-tested, it is better to eliminate this potential failure mode
entirely.
The other feature provided by this new system call is the ability to
request randomness from the /dev/urandom entropy pool, but to block
until at least 128 bits of entropy has been accumulated in the
/dev/urandom entropy pool. Historically, the emphasis in the
/dev/urandom development has been to ensure that urandom pool is
initialized as quickly as possible after system boot, and preferably
before the init scripts start execution.
This is because changing /dev/urandom reads to block represents an
interface change that could potentially break userspace which is not
acceptable. In practice, on most x86 desktop and server systems, in
general the entropy pool can be initialized before it is needed (and
in modern kernels, we will printk a warning message if not). However,
on an embedded system, this may not be the case. And so with this new
interface, we can provide the functionality of blocking until the
urandom pool has been initialized. Any userspace program which uses
this new functionality must take care to assure that if it is used
during the boot process, that it will not cause the init scripts or
other portions of the system startup to hang indefinitely.
SYNOPSIS
#include <linux/random.h>
int getrandom(void *buf, size_t buflen, unsigned int flags);
DESCRIPTION
The system call getrandom() fills the buffer pointed to by buf
with up to buflen random bytes which can be used to seed user
space random number generators (i.e., DRBG's) or for other
cryptographic uses. It should not be used for Monte Carlo
simulations or other programs/algorithms which are doing
probabilistic sampling.
If the GRND_RANDOM flags bit is set, then draw from the
/dev/random pool instead of the /dev/urandom pool. The
/dev/random pool is limited based on the entropy that can be
obtained from environmental noise, so if there is insufficient
entropy, the requested number of bytes may not be returned.
If there is no entropy available at all, getrandom(2) will
either block, or return an error with errno set to EAGAIN if
the GRND_NONBLOCK bit is set in flags.
If the GRND_RANDOM bit is not set, then the /dev/urandom pool
will be used. Unlike using read(2) to fetch data from
/dev/urandom, if the urandom pool has not been sufficiently
initialized, getrandom(2) will block (or return -1 with the
errno set to EAGAIN if the GRND_NONBLOCK bit is set in flags).
The getentropy(2) system call in OpenBSD can be emulated using
the following function:
int getentropy(void *buf, size_t buflen)
{
int ret;
if (buflen > 256)
goto failure;
ret = getrandom(buf, buflen, 0);
if (ret < 0)
return ret;
if (ret == buflen)
return 0;
failure:
errno = EIO;
return -1;
}
RETURN VALUE
On success, the number of bytes that was filled in the buf is
returned. This may not be all the bytes requested by the
caller via buflen if insufficient entropy was present in the
/dev/random pool, or if the system call was interrupted by a
signal.
On error, -1 is returned, and errno is set appropriately.
ERRORS
EINVAL An invalid flag was passed to getrandom(2)
EFAULT buf is outside the accessible address space.
EAGAIN The requested entropy was not available, and
getentropy(2) would have blocked if the
GRND_NONBLOCK flag was not set.
EINTR While blocked waiting for entropy, the call was
interrupted by a signal handler; see the description
of how interrupted read(2) calls on "slow" devices
are handled with and without the SA_RESTART flag
in the signal(7) man page.
NOTES
For small requests (buflen <= 256) getrandom(2) will not
return EINTR when reading from the urandom pool once the
entropy pool has been initialized, and it will return all of
the bytes that have been requested. This is the recommended
way to use getrandom(2), and is designed for compatibility
with OpenBSD's getentropy() system call.
However, if you are using GRND_RANDOM, then getrandom(2) may
block until the entropy accounting determines that sufficient
environmental noise has been gathered such that getrandom(2)
will be operating as a NRBG instead of a DRBG for those people
who are working in the NIST SP 800-90 regime. Since it may
block for a long time, these guarantees do *not* apply. The
user may want to interrupt a hanging process using a signal,
so blocking until all of the requested bytes are returned
would be unfriendly.
For this reason, the user of getrandom(2) MUST always check
the return value, in case it returns some error, or if fewer
bytes than requested was returned. In the case of
!GRND_RANDOM and small request, the latter should never
happen, but the careful userspace code (and all crypto code
should be careful) should check for this anyway!
Finally, unless you are doing long-term key generation (and
perhaps not even then), you probably shouldn't be using
GRND_RANDOM. The cryptographic algorithms used for
/dev/urandom are quite conservative, and so should be
sufficient for all purposes. The disadvantage of GRND_RANDOM
is that it can block, and the increased complexity required to
deal with partially fulfilled getrandom(2) requests.
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Zach Brown <zab@zabbo.net>
2014-07-17 16:13:05 +08:00
|
|
|
355 i386 getrandom sys_getrandom
|
shm: add memfd_create() syscall
memfd_create() is similar to mmap(MAP_ANON), but returns a file-descriptor
that you can pass to mmap(). It can support sealing and avoids any
connection to user-visible mount-points. Thus, it's not subject to quotas
on mounted file-systems, but can be used like malloc()'ed memory, but with
a file-descriptor to it.
memfd_create() returns the raw shmem file, so calls like ftruncate() can
be used to modify the underlying inode. Also calls like fstat() will
return proper information and mark the file as regular file. If you want
sealing, you can specify MFD_ALLOW_SEALING. Otherwise, sealing is not
supported (like on all other regular files).
Compared to O_TMPFILE, it does not require a tmpfs mount-point and is not
subject to a filesystem size limit. It is still properly accounted to
memcg limits, though, and to the same overcommit or no-overcommit
accounting as all user memory.
Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ryan Lortie <desrt@desrt.ca>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-08-09 05:25:29 +08:00
|
|
|
356 i386 memfd_create sys_memfd_create
|
2014-09-26 15:16:58 +08:00
|
|
|
357 i386 bpf sys_bpf
|
2015-10-06 08:48:05 +08:00
|
|
|
358 i386 execveat sys_execveat compat_sys_execveat
|
2015-07-15 06:24:24 +08:00
|
|
|
359 i386 socket sys_socket
|
|
|
|
360 i386 socketpair sys_socketpair
|
|
|
|
361 i386 bind sys_bind
|
|
|
|
362 i386 connect sys_connect
|
|
|
|
363 i386 listen sys_listen
|
|
|
|
364 i386 accept4 sys_accept4
|
|
|
|
365 i386 getsockopt sys_getsockopt compat_sys_getsockopt
|
|
|
|
366 i386 setsockopt sys_setsockopt compat_sys_setsockopt
|
|
|
|
367 i386 getsockname sys_getsockname
|
|
|
|
368 i386 getpeername sys_getpeername
|
|
|
|
369 i386 sendto sys_sendto
|
|
|
|
370 i386 sendmsg sys_sendmsg compat_sys_sendmsg
|
|
|
|
371 i386 recvfrom sys_recvfrom compat_sys_recvfrom
|
|
|
|
372 i386 recvmsg sys_recvmsg compat_sys_recvmsg
|
|
|
|
373 i386 shutdown sys_shutdown
|
2015-09-05 06:46:58 +08:00
|
|
|
374 i386 userfaultfd sys_userfaultfd
|
sys_membarrier(): system-wide memory barrier (generic, x86)
Here is an implementation of a new system call, sys_membarrier(), which
executes a memory barrier on all threads running on the system. It is
implemented by calling synchronize_sched(). It can be used to
distribute the cost of user-space memory barriers asymmetrically by
transforming pairs of memory barriers into pairs consisting of
sys_membarrier() and a compiler barrier. For synchronization primitives
that distinguish between read-side and write-side (e.g. userspace RCU
[1], rwlocks), the read-side can be accelerated significantly by moving
the bulk of the memory barrier overhead to the write-side.
The existing applications of which I am aware that would be improved by
this system call are as follows:
* Through Userspace RCU library (http://urcu.so)
- DNS server (Knot DNS) https://www.knot-dns.cz/
- Network sniffer (http://netsniff-ng.org/)
- Distributed object storage (https://sheepdog.github.io/sheepdog/)
- User-space tracing (http://lttng.org)
- Network storage system (https://www.gluster.org/)
- Virtual routers (https://events.linuxfoundation.org/sites/events/files/slides/DPDK_RCU_0MQ.pdf)
- Financial software (https://lkml.org/lkml/2015/3/23/189)
Those projects use RCU in userspace to increase read-side speed and
scalability compared to locking. Especially in the case of RCU used by
libraries, sys_membarrier can speed up the read-side by moving the bulk of
the memory barrier cost to synchronize_rcu().
* Direct users of sys_membarrier
- core dotnet garbage collector (https://github.com/dotnet/coreclr/issues/198)
Microsoft core dotnet GC developers are planning to use the mprotect()
side-effect of issuing memory barriers through IPIs as a way to implement
Windows FlushProcessWriteBuffers() on Linux. They are referring to
sys_membarrier in their github thread, specifically stating that
sys_membarrier() is what they are looking for.
To explain the benefit of this scheme, let's introduce two example threads:
Thread A (non-frequent, e.g. executing liburcu synchronize_rcu())
Thread B (frequent, e.g. executing liburcu
rcu_read_lock()/rcu_read_unlock())
In a scheme where all smp_mb() in thread A are ordering memory accesses
with respect to smp_mb() present in Thread B, we can change each
smp_mb() within Thread A into calls to sys_membarrier() and each
smp_mb() within Thread B into compiler barriers "barrier()".
Before the change, we had, for each smp_mb() pairs:
Thread A Thread B
previous mem accesses previous mem accesses
smp_mb() smp_mb()
following mem accesses following mem accesses
After the change, these pairs become:
Thread A Thread B
prev mem accesses prev mem accesses
sys_membarrier() barrier()
follow mem accesses follow mem accesses
As we can see, there are two possible scenarios: either Thread B memory
accesses do not happen concurrently with Thread A accesses (1), or they
do (2).
1) Non-concurrent Thread A vs Thread B accesses:
Thread A Thread B
prev mem accesses
sys_membarrier()
follow mem accesses
prev mem accesses
barrier()
follow mem accesses
In this case, thread B accesses will be weakly ordered. This is OK,
because at that point, thread A is not particularly interested in
ordering them with respect to its own accesses.
2) Concurrent Thread A vs Thread B accesses
Thread A Thread B
prev mem accesses prev mem accesses
sys_membarrier() barrier()
follow mem accesses follow mem accesses
In this case, thread B accesses, which are ensured to be in program
order thanks to the compiler barrier, will be "upgraded" to full
smp_mb() by synchronize_sched().
* Benchmarks
On Intel Xeon E5405 (8 cores)
(one thread is calling sys_membarrier, the other 7 threads are busy
looping)
1000 non-expedited sys_membarrier calls in 33s =3D 33 milliseconds/call.
* User-space user of this system call: Userspace RCU library
Both the signal-based and the sys_membarrier userspace RCU schemes
permit us to remove the memory barrier from the userspace RCU
rcu_read_lock() and rcu_read_unlock() primitives, thus significantly
accelerating them. These memory barriers are replaced by compiler
barriers on the read-side, and all matching memory barriers on the
write-side are turned into an invocation of a memory barrier on all
active threads in the process. By letting the kernel perform this
synchronization rather than dumbly sending a signal to every process
threads (as we currently do), we diminish the number of unnecessary wake
ups and only issue the memory barriers on active threads. Non-running
threads do not need to execute such barrier anyway, because these are
implied by the scheduler context switches.
Results in liburcu:
Operations in 10s, 6 readers, 2 writers:
memory barriers in reader: 1701557485 reads, 2202847 writes
signal-based scheme: 9830061167 reads, 6700 writes
sys_membarrier: 9952759104 reads, 425 writes
sys_membarrier (dyn. check): 7970328887 reads, 425 writes
The dynamic sys_membarrier availability check adds some overhead to
the read-side compared to the signal-based scheme, but besides that,
sys_membarrier slightly outperforms the signal-based scheme. However,
this non-expedited sys_membarrier implementation has a much slower grace
period than signal and memory barrier schemes.
Besides diminishing the number of wake-ups, one major advantage of the
membarrier system call over the signal-based scheme is that it does not
need to reserve a signal. This plays much more nicely with libraries,
and with processes injected into for tracing purposes, for which we
cannot expect that signals will be unused by the application.
An expedited version of this system call can be added later on to speed
up the grace period. Its implementation will likely depend on reading
the cpu_curr()->mm without holding each CPU's rq lock.
This patch adds the system call to x86 and to asm-generic.
[1] http://urcu.so
membarrier(2) man page:
MEMBARRIER(2) Linux Programmer's Manual MEMBARRIER(2)
NAME
membarrier - issue memory barriers on a set of threads
SYNOPSIS
#include <linux/membarrier.h>
int membarrier(int cmd, int flags);
DESCRIPTION
The cmd argument is one of the following:
MEMBARRIER_CMD_QUERY
Query the set of supported commands. It returns a bitmask of
supported commands.
MEMBARRIER_CMD_SHARED
Execute a memory barrier on all threads running on the system.
Upon return from system call, the caller thread is ensured that
all running threads have passed through a state where all memory
accesses to user-space addresses match program order between
entry to and return from the system call (non-running threads
are de facto in such a state). This covers threads from all pro=E2=80=90
cesses running on the system. This command returns 0.
The flags argument needs to be 0. For future extensions.
All memory accesses performed in program order from each targeted
thread is guaranteed to be ordered with respect to sys_membarrier(). If
we use the semantic "barrier()" to represent a compiler barrier forcing
memory accesses to be performed in program order across the barrier,
and smp_mb() to represent explicit memory barriers forcing full memory
ordering across the barrier, we have the following ordering table for
each pair of barrier(), sys_membarrier() and smp_mb():
The pair ordering is detailed as (O: ordered, X: not ordered):
barrier() smp_mb() sys_membarrier()
barrier() X X O
smp_mb() X O O
sys_membarrier() O O O
RETURN VALUE
On success, these system calls return zero. On error, -1 is returned,
and errno is set appropriately. For a given command, with flags
argument set to 0, this system call is guaranteed to always return the
same value until reboot.
ERRORS
ENOSYS System call is not implemented.
EINVAL Invalid arguments.
Linux 2015-04-15 MEMBARRIER(2)
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Nicholas Miell <nmiell@comcast.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Pranith Kumar <bobby.prani@gmail.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-09-12 04:07:39 +08:00
|
|
|
375 i386 membarrier sys_membarrier
|
2015-11-06 10:51:33 +08:00
|
|
|
376 i386 mlock2 sys_mlock2
|
2015-11-11 05:53:31 +08:00
|
|
|
377 i386 copy_file_range sys_copy_file_range
|
2016-05-11 16:48:17 +08:00
|
|
|
378 i386 preadv2 sys_preadv2 compat_sys_preadv2
|
|
|
|
379 i386 pwritev2 sys_pwritev2 compat_sys_pwritev2
|
2016-07-30 00:30:17 +08:00
|
|
|
380 i386 pkey_mprotect sys_pkey_mprotect
|
|
|
|
381 i386 pkey_alloc sys_pkey_alloc
|
|
|
|
382 i386 pkey_free sys_pkey_free
|
statx: Add a system call to make enhanced file info available
Add a system call to make extended file information available, including
file creation and some attribute flags where available through the
underlying filesystem.
The getattr inode operation is altered to take two additional arguments: a
u32 request_mask and an unsigned int flags that indicate the
synchronisation mode. This change is propagated to the vfs_getattr*()
function.
Functions like vfs_stat() are now inline wrappers around new functions
vfs_statx() and vfs_statx_fd() to reduce stack usage.
========
OVERVIEW
========
The idea was initially proposed as a set of xattrs that could be retrieved
with getxattr(), but the general preference proved to be for a new syscall
with an extended stat structure.
A number of requests were gathered for features to be included. The
following have been included:
(1) Make the fields a consistent size on all arches and make them large.
(2) Spare space, request flags and information flags are provided for
future expansion.
(3) Better support for the y2038 problem [Arnd Bergmann] (tv_sec is an
__s64).
(4) Creation time: The SMB protocol carries the creation time, which could
be exported by Samba, which will in turn help CIFS make use of
FS-Cache as that can be used for coherency data (stx_btime).
This is also specified in NFSv4 as a recommended attribute and could
be exported by NFSD [Steve French].
(5) Lightweight stat: Ask for just those details of interest, and allow a
netfs (such as NFS) to approximate anything not of interest, possibly
without going to the server [Trond Myklebust, Ulrich Drepper, Andreas
Dilger] (AT_STATX_DONT_SYNC).
(6) Heavyweight stat: Force a netfs to go to the server, even if it thinks
its cached attributes are up to date [Trond Myklebust]
(AT_STATX_FORCE_SYNC).
And the following have been left out for future extension:
(7) Data version number: Could be used by userspace NFS servers [Aneesh
Kumar].
Can also be used to modify fill_post_wcc() in NFSD which retrieves
i_version directly, but has just called vfs_getattr(). It could get
it from the kstat struct if it used vfs_xgetattr() instead.
(There's disagreement on the exact semantics of a single field, since
not all filesystems do this the same way).
(8) BSD stat compatibility: Including more fields from the BSD stat such
as creation time (st_btime) and inode generation number (st_gen)
[Jeremy Allison, Bernd Schubert].
(9) Inode generation number: Useful for FUSE and userspace NFS servers
[Bernd Schubert].
(This was asked for but later deemed unnecessary with the
open-by-handle capability available and caused disagreement as to
whether it's a security hole or not).
(10) Extra coherency data may be useful in making backups [Andreas Dilger].
(No particular data were offered, but things like last backup
timestamp, the data version number and the DOS archive bit would come
into this category).
(11) Allow the filesystem to indicate what it can/cannot provide: A
filesystem can now say it doesn't support a standard stat feature if
that isn't available, so if, for instance, inode numbers or UIDs don't
exist or are fabricated locally...
(This requires a separate system call - I have an fsinfo() call idea
for this).
(12) Store a 16-byte volume ID in the superblock that can be returned in
struct xstat [Steve French].
(Deferred to fsinfo).
(13) Include granularity fields in the time data to indicate the
granularity of each of the times (NFSv4 time_delta) [Steve French].
(Deferred to fsinfo).
(14) FS_IOC_GETFLAGS value. These could be translated to BSD's st_flags.
Note that the Linux IOC flags are a mess and filesystems such as Ext4
define flags that aren't in linux/fs.h, so translation in the kernel
may be a necessity (or, possibly, we provide the filesystem type too).
(Some attributes are made available in stx_attributes, but the general
feeling was that the IOC flags were to ext[234]-specific and shouldn't
be exposed through statx this way).
(15) Mask of features available on file (eg: ACLs, seclabel) [Brad Boyer,
Michael Kerrisk].
(Deferred, probably to fsinfo. Finding out if there's an ACL or
seclabal might require extra filesystem operations).
(16) Femtosecond-resolution timestamps [Dave Chinner].
(A __reserved field has been left in the statx_timestamp struct for
this - if there proves to be a need).
(17) A set multiple attributes syscall to go with this.
===============
NEW SYSTEM CALL
===============
The new system call is:
int ret = statx(int dfd,
const char *filename,
unsigned int flags,
unsigned int mask,
struct statx *buffer);
The dfd, filename and flags parameters indicate the file to query, in a
similar way to fstatat(). There is no equivalent of lstat() as that can be
emulated with statx() by passing AT_SYMLINK_NOFOLLOW in flags. There is
also no equivalent of fstat() as that can be emulated by passing a NULL
filename to statx() with the fd of interest in dfd.
Whether or not statx() synchronises the attributes with the backing store
can be controlled by OR'ing a value into the flags argument (this typically
only affects network filesystems):
(1) AT_STATX_SYNC_AS_STAT tells statx() to behave as stat() does in this
respect.
(2) AT_STATX_FORCE_SYNC will require a network filesystem to synchronise
its attributes with the server - which might require data writeback to
occur to get the timestamps correct.
(3) AT_STATX_DONT_SYNC will suppress synchronisation with the server in a
network filesystem. The resulting values should be considered
approximate.
mask is a bitmask indicating the fields in struct statx that are of
interest to the caller. The user should set this to STATX_BASIC_STATS to
get the basic set returned by stat(). It should be noted that asking for
more information may entail extra I/O operations.
buffer points to the destination for the data. This must be 256 bytes in
size.
======================
MAIN ATTRIBUTES RECORD
======================
The following structures are defined in which to return the main attribute
set:
struct statx_timestamp {
__s64 tv_sec;
__s32 tv_nsec;
__s32 __reserved;
};
struct statx {
__u32 stx_mask;
__u32 stx_blksize;
__u64 stx_attributes;
__u32 stx_nlink;
__u32 stx_uid;
__u32 stx_gid;
__u16 stx_mode;
__u16 __spare0[1];
__u64 stx_ino;
__u64 stx_size;
__u64 stx_blocks;
__u64 __spare1[1];
struct statx_timestamp stx_atime;
struct statx_timestamp stx_btime;
struct statx_timestamp stx_ctime;
struct statx_timestamp stx_mtime;
__u32 stx_rdev_major;
__u32 stx_rdev_minor;
__u32 stx_dev_major;
__u32 stx_dev_minor;
__u64 __spare2[14];
};
The defined bits in request_mask and stx_mask are:
STATX_TYPE Want/got stx_mode & S_IFMT
STATX_MODE Want/got stx_mode & ~S_IFMT
STATX_NLINK Want/got stx_nlink
STATX_UID Want/got stx_uid
STATX_GID Want/got stx_gid
STATX_ATIME Want/got stx_atime{,_ns}
STATX_MTIME Want/got stx_mtime{,_ns}
STATX_CTIME Want/got stx_ctime{,_ns}
STATX_INO Want/got stx_ino
STATX_SIZE Want/got stx_size
STATX_BLOCKS Want/got stx_blocks
STATX_BASIC_STATS [The stuff in the normal stat struct]
STATX_BTIME Want/got stx_btime{,_ns}
STATX_ALL [All currently available stuff]
stx_btime is the file creation time, stx_mask is a bitmask indicating the
data provided and __spares*[] are where as-yet undefined fields can be
placed.
Time fields are structures with separate seconds and nanoseconds fields
plus a reserved field in case we want to add even finer resolution. Note
that times will be negative if before 1970; in such a case, the nanosecond
fields will also be negative if not zero.
The bits defined in the stx_attributes field convey information about a
file, how it is accessed, where it is and what it does. The following
attributes map to FS_*_FL flags and are the same numerical value:
STATX_ATTR_COMPRESSED File is compressed by the fs
STATX_ATTR_IMMUTABLE File is marked immutable
STATX_ATTR_APPEND File is append-only
STATX_ATTR_NODUMP File is not to be dumped
STATX_ATTR_ENCRYPTED File requires key to decrypt in fs
Within the kernel, the supported flags are listed by:
KSTAT_ATTR_FS_IOC_FLAGS
[Are any other IOC flags of sufficient general interest to be exposed
through this interface?]
New flags include:
STATX_ATTR_AUTOMOUNT Object is an automount trigger
These are for the use of GUI tools that might want to mark files specially,
depending on what they are.
Fields in struct statx come in a number of classes:
(0) stx_dev_*, stx_blksize.
These are local system information and are always available.
(1) stx_mode, stx_nlinks, stx_uid, stx_gid, stx_[amc]time, stx_ino,
stx_size, stx_blocks.
These will be returned whether the caller asks for them or not. The
corresponding bits in stx_mask will be set to indicate whether they
actually have valid values.
If the caller didn't ask for them, then they may be approximated. For
example, NFS won't waste any time updating them from the server,
unless as a byproduct of updating something requested.
If the values don't actually exist for the underlying object (such as
UID or GID on a DOS file), then the bit won't be set in the stx_mask,
even if the caller asked for the value. In such a case, the returned
value will be a fabrication.
Note that there are instances where the type might not be valid, for
instance Windows reparse points.
(2) stx_rdev_*.
This will be set only if stx_mode indicates we're looking at a
blockdev or a chardev, otherwise will be 0.
(3) stx_btime.
Similar to (1), except this will be set to 0 if it doesn't exist.
=======
TESTING
=======
The following test program can be used to test the statx system call:
samples/statx/test-statx.c
Just compile and run, passing it paths to the files you want to examine.
The file is built automatically if CONFIG_SAMPLES is enabled.
Here's some example output. Firstly, an NFS directory that crosses to
another FSID. Note that the AUTOMOUNT attribute is set because transiting
this directory will cause d_automount to be invoked by the VFS.
[root@andromeda ~]# /tmp/test-statx -A /warthog/data
statx(/warthog/data) = 0
results=7ff
Size: 4096 Blocks: 8 IO Block: 1048576 directory
Device: 00:26 Inode: 1703937 Links: 125
Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041
Access: 2016-11-24 09:02:12.219699527+0000
Modify: 2016-11-17 10:44:36.225653653+0000
Change: 2016-11-17 10:44:36.225653653+0000
Attributes: 0000000000001000 (-------- -------- -------- -------- -------- -------- ---m---- --------)
Secondly, the result of automounting on that directory.
[root@andromeda ~]# /tmp/test-statx /warthog/data
statx(/warthog/data) = 0
results=7ff
Size: 4096 Blocks: 8 IO Block: 1048576 directory
Device: 00:27 Inode: 2 Links: 125
Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041
Access: 2016-11-24 09:02:12.219699527+0000
Modify: 2016-11-17 10:44:36.225653653+0000
Change: 2016-11-17 10:44:36.225653653+0000
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2017-02-01 00:46:22 +08:00
|
|
|
383 i386 statx sys_statx
|
2017-03-20 16:16:24 +08:00
|
|
|
384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
|