Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs fixes from Al Viro.

Clean up file table accesses (get rid of fget_light() in favor of the
fdget() interface), add proper file position locking.

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  get rid of fget_light()
  sockfd_lookup_light(): switch to fdget^W^Waway from fget_light
  vfs: atomic f_pos accesses as per POSIX
  ocfs2 syncs the wrong range...
This commit is contained in:
Linus Torvalds 2014-03-10 12:57:26 -07:00
commit e6a4b6f5ea
9 changed files with 107 additions and 52 deletions

View File

@ -683,35 +683,65 @@ EXPORT_SYMBOL(fget_raw);
* The fput_needed flag returned by fget_light should be passed to the * The fput_needed flag returned by fget_light should be passed to the
* corresponding fput_light. * corresponding fput_light.
*/ */
struct file *__fget_light(unsigned int fd, fmode_t mask, int *fput_needed) static unsigned long __fget_light(unsigned int fd, fmode_t mask)
{ {
struct files_struct *files = current->files; struct files_struct *files = current->files;
struct file *file; struct file *file;
*fput_needed = 0;
if (atomic_read(&files->count) == 1) { if (atomic_read(&files->count) == 1) {
file = __fcheck_files(files, fd); file = __fcheck_files(files, fd);
if (file && (file->f_mode & mask)) if (!file || unlikely(file->f_mode & mask))
file = NULL; return 0;
return (unsigned long)file;
} else { } else {
file = __fget(fd, mask); file = __fget(fd, mask);
if (file) if (!file)
*fput_needed = 1; return 0;
return FDPUT_FPUT | (unsigned long)file;
} }
return file;
} }
struct file *fget_light(unsigned int fd, int *fput_needed) unsigned long __fdget(unsigned int fd)
{ {
return __fget_light(fd, FMODE_PATH, fput_needed); return __fget_light(fd, FMODE_PATH);
} }
EXPORT_SYMBOL(fget_light); EXPORT_SYMBOL(__fdget);
struct file *fget_raw_light(unsigned int fd, int *fput_needed) unsigned long __fdget_raw(unsigned int fd)
{ {
return __fget_light(fd, 0, fput_needed); return __fget_light(fd, 0);
} }
unsigned long __fdget_pos(unsigned int fd)
{
struct files_struct *files = current->files;
struct file *file;
unsigned long v;
if (atomic_read(&files->count) == 1) {
file = __fcheck_files(files, fd);
v = 0;
} else {
file = __fget(fd, 0);
v = FDPUT_FPUT;
}
if (!file)
return 0;
if (file->f_mode & FMODE_ATOMIC_POS) {
if (file_count(file) > 1) {
v |= FDPUT_POS_UNLOCK;
mutex_lock(&file->f_pos_lock);
}
}
return v | (unsigned long)file;
}
/*
* We only lock f_pos if we have threads or if the file might be
* shared with another process. In both cases we'll have an elevated
* file count (done either by fdget() or by fork()).
*/
void set_close_on_exec(unsigned int fd, int flag) void set_close_on_exec(unsigned int fd, int flag)
{ {
struct files_struct *files = current->files; struct files_struct *files = current->files;

View File

@ -135,6 +135,7 @@ struct file *get_empty_filp(void)
atomic_long_set(&f->f_count, 1); atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock); rwlock_init(&f->f_owner.lock);
spin_lock_init(&f->f_lock); spin_lock_init(&f->f_lock);
mutex_init(&f->f_pos_lock);
eventpoll_init_file(f); eventpoll_init_file(f);
/* f->f_version: 0 */ /* f->f_version: 0 */
return f; return f;

View File

@ -1884,7 +1884,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->path = f.file->f_path; nd->path = f.file->f_path;
if (flags & LOOKUP_RCU) { if (flags & LOOKUP_RCU) {
if (f.need_put) if (f.flags & FDPUT_FPUT)
*fp = f.file; *fp = f.file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
rcu_read_lock(); rcu_read_lock();

View File

@ -2393,8 +2393,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
((file->f_flags & O_DIRECT) && !direct_io)) { ((file->f_flags & O_DIRECT) && !direct_io)) {
ret = filemap_fdatawrite_range(file->f_mapping, pos, ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
pos + count - 1); *ppos + count - 1);
if (ret < 0) if (ret < 0)
written = ret; written = ret;
@ -2407,8 +2407,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
} }
if (!ret) if (!ret)
ret = filemap_fdatawait_range(file->f_mapping, pos, ret = filemap_fdatawait_range(file->f_mapping, *ppos,
pos + count - 1); *ppos + count - 1);
} }
/* /*

View File

@ -705,6 +705,10 @@ static int do_dentry_open(struct file *f,
return 0; return 0;
} }
/* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
if (S_ISREG(inode->i_mode))
f->f_mode |= FMODE_ATOMIC_POS;
f->f_op = fops_get(inode->i_fop); f->f_op = fops_get(inode->i_fop);
if (unlikely(WARN_ON(!f->f_op))) { if (unlikely(WARN_ON(!f->f_op))) {
error = -ENODEV; error = -ENODEV;

View File

@ -264,10 +264,22 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
} }
EXPORT_SYMBOL(vfs_llseek); EXPORT_SYMBOL(vfs_llseek);
static inline struct fd fdget_pos(int fd)
{
return __to_fd(__fdget_pos(fd));
}
static inline void fdput_pos(struct fd f)
{
if (f.flags & FDPUT_POS_UNLOCK)
mutex_unlock(&f.file->f_pos_lock);
fdput(f);
}
SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
{ {
off_t retval; off_t retval;
struct fd f = fdget(fd); struct fd f = fdget_pos(fd);
if (!f.file) if (!f.file)
return -EBADF; return -EBADF;
@ -278,7 +290,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
if (res != (loff_t)retval) if (res != (loff_t)retval)
retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
} }
fdput(f); fdput_pos(f);
return retval; return retval;
} }
@ -498,7 +510,7 @@ static inline void file_pos_write(struct file *file, loff_t pos)
SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
{ {
struct fd f = fdget(fd); struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF; ssize_t ret = -EBADF;
if (f.file) { if (f.file) {
@ -506,7 +518,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
ret = vfs_read(f.file, buf, count, &pos); ret = vfs_read(f.file, buf, count, &pos);
if (ret >= 0) if (ret >= 0)
file_pos_write(f.file, pos); file_pos_write(f.file, pos);
fdput(f); fdput_pos(f);
} }
return ret; return ret;
} }
@ -514,7 +526,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
size_t, count) size_t, count)
{ {
struct fd f = fdget(fd); struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF; ssize_t ret = -EBADF;
if (f.file) { if (f.file) {
@ -522,7 +534,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
ret = vfs_write(f.file, buf, count, &pos); ret = vfs_write(f.file, buf, count, &pos);
if (ret >= 0) if (ret >= 0)
file_pos_write(f.file, pos); file_pos_write(f.file, pos);
fdput(f); fdput_pos(f);
} }
return ret; return ret;
@ -797,7 +809,7 @@ EXPORT_SYMBOL(vfs_writev);
SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen) unsigned long, vlen)
{ {
struct fd f = fdget(fd); struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF; ssize_t ret = -EBADF;
if (f.file) { if (f.file) {
@ -805,7 +817,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
ret = vfs_readv(f.file, vec, vlen, &pos); ret = vfs_readv(f.file, vec, vlen, &pos);
if (ret >= 0) if (ret >= 0)
file_pos_write(f.file, pos); file_pos_write(f.file, pos);
fdput(f); fdput_pos(f);
} }
if (ret > 0) if (ret > 0)
@ -817,7 +829,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen) unsigned long, vlen)
{ {
struct fd f = fdget(fd); struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF; ssize_t ret = -EBADF;
if (f.file) { if (f.file) {
@ -825,7 +837,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
ret = vfs_writev(f.file, vec, vlen, &pos); ret = vfs_writev(f.file, vec, vlen, &pos);
if (ret >= 0) if (ret >= 0)
file_pos_write(f.file, pos); file_pos_write(f.file, pos);
fdput(f); fdput_pos(f);
} }
if (ret > 0) if (ret > 0)
@ -968,7 +980,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
const struct compat_iovec __user *,vec, const struct compat_iovec __user *,vec,
compat_ulong_t, vlen) compat_ulong_t, vlen)
{ {
struct fd f = fdget(fd); struct fd f = fdget_pos(fd);
ssize_t ret; ssize_t ret;
loff_t pos; loff_t pos;
@ -978,7 +990,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
ret = compat_readv(f.file, vec, vlen, &pos); ret = compat_readv(f.file, vec, vlen, &pos);
if (ret >= 0) if (ret >= 0)
f.file->f_pos = pos; f.file->f_pos = pos;
fdput(f); fdput_pos(f);
return ret; return ret;
} }
@ -1035,7 +1047,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
const struct compat_iovec __user *, vec, const struct compat_iovec __user *, vec,
compat_ulong_t, vlen) compat_ulong_t, vlen)
{ {
struct fd f = fdget(fd); struct fd f = fdget_pos(fd);
ssize_t ret; ssize_t ret;
loff_t pos; loff_t pos;
@ -1045,7 +1057,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
ret = compat_writev(f.file, vec, vlen, &pos); ret = compat_writev(f.file, vec, vlen, &pos);
if (ret >= 0) if (ret >= 0)
f.file->f_pos = pos; f.file->f_pos = pos;
fdput(f); fdput_pos(f);
return ret; return ret;
} }

View File

@ -28,33 +28,36 @@ static inline void fput_light(struct file *file, int fput_needed)
struct fd { struct fd {
struct file *file; struct file *file;
int need_put; unsigned int flags;
}; };
#define FDPUT_FPUT 1
#define FDPUT_POS_UNLOCK 2
static inline void fdput(struct fd fd) static inline void fdput(struct fd fd)
{ {
if (fd.need_put) if (fd.flags & FDPUT_FPUT)
fput(fd.file); fput(fd.file);
} }
extern struct file *fget(unsigned int fd); extern struct file *fget(unsigned int fd);
extern struct file *fget_light(unsigned int fd, int *fput_needed); extern struct file *fget_raw(unsigned int fd);
extern unsigned long __fdget(unsigned int fd);
extern unsigned long __fdget_raw(unsigned int fd);
extern unsigned long __fdget_pos(unsigned int fd);
static inline struct fd __to_fd(unsigned long v)
{
return (struct fd){(struct file *)(v & ~3),v & 3};
}
static inline struct fd fdget(unsigned int fd) static inline struct fd fdget(unsigned int fd)
{ {
int b; return __to_fd(__fdget(fd));
struct file *f = fget_light(fd, &b);
return (struct fd){f,b};
} }
extern struct file *fget_raw(unsigned int fd);
extern struct file *fget_raw_light(unsigned int fd, int *fput_needed);
static inline struct fd fdget_raw(unsigned int fd) static inline struct fd fdget_raw(unsigned int fd)
{ {
int b; return __to_fd(__fdget_raw(fd));
struct file *f = fget_raw_light(fd, &b);
return (struct fd){f,b};
} }
extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);

View File

@ -123,6 +123,9 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File is opened with O_PATH; almost nothing can be done with it */ /* File is opened with O_PATH; almost nothing can be done with it */
#define FMODE_PATH ((__force fmode_t)0x4000) #define FMODE_PATH ((__force fmode_t)0x4000)
/* File needs atomic accesses to f_pos */
#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000)
/* File was opened by fanotify and shouldn't generate fanotify events */ /* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x1000000) #define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
@ -780,13 +783,14 @@ struct file {
const struct file_operations *f_op; const struct file_operations *f_op;
/* /*
* Protects f_ep_links, f_flags, f_pos vs i_size in lseek SEEK_CUR. * Protects f_ep_links, f_flags.
* Must not be taken from IRQ context. * Must not be taken from IRQ context.
*/ */
spinlock_t f_lock; spinlock_t f_lock;
atomic_long_t f_count; atomic_long_t f_count;
unsigned int f_flags; unsigned int f_flags;
fmode_t f_mode; fmode_t f_mode;
struct mutex f_pos_lock;
loff_t f_pos; loff_t f_pos;
struct fown_struct f_owner; struct fown_struct f_owner;
const struct cred *f_cred; const struct cred *f_cred;
@ -808,7 +812,7 @@ struct file {
#ifdef CONFIG_DEBUG_WRITECOUNT #ifdef CONFIG_DEBUG_WRITECOUNT
unsigned long f_mnt_write_state; unsigned long f_mnt_write_state;
#endif #endif
}; } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
struct file_handle { struct file_handle {
__u32 handle_bytes; __u32 handle_bytes;

View File

@ -450,16 +450,17 @@ EXPORT_SYMBOL(sockfd_lookup);
static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
{ {
struct file *file; struct fd f = fdget(fd);
struct socket *sock; struct socket *sock;
*err = -EBADF; *err = -EBADF;
file = fget_light(fd, fput_needed); if (f.file) {
if (file) { sock = sock_from_file(f.file, err);
sock = sock_from_file(file, err); if (likely(sock)) {
if (sock) *fput_needed = f.flags;
return sock; return sock;
fput_light(file, *fput_needed); }
fdput(f);
} }
return NULL; return NULL;
} }