mirror of https://gitee.com/openkylin/linux.git
fs/file.c: don't acquire files->file_lock in fd_install()
Mateusz Guzik reported : Currently obtaining a new file descriptor results in locking fdtable twice - once in order to reserve a slot and second time to fill it. Holding the spinlock in __fd_install() is needed in case a resize is done, or to prevent a resize. Mateusz provided an RFC patch and a micro benchmark : http://people.redhat.com/~mguzik/pipebench.c A resize is an unlikely operation in a process lifetime, as table size is at least doubled at every resize. We can use RCU instead of the spinlock. __fd_install() must wait if a resize is in progress. The resize must block new __fd_install() callers from starting, and wait that ongoing install are finished (synchronize_sched()) resize should be attempted by a single thread to not waste resources. rcu_sched variant is used, as __fd_install() and expand_fdtable() run from process context. It gives us a ~30% speedup using pipebench on a dual Intel(R) Xeon(R) CPU E5-2696 v2 @ 2.50GHz Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Mateusz Guzik <mguzik@redhat.com> Acked-by: Mateusz Guzik <mguzik@redhat.com> Tested-by: Mateusz Guzik <mguzik@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
1af95de6f0
commit
8a81252b77
|
@ -500,3 +500,7 @@ in your dentry operations instead.
|
||||||
dentry, it does not get nameidata at all and it gets called only when cookie
|
dentry, it does not get nameidata at all and it gets called only when cookie
|
||||||
is non-NULL. Note that link body isn't available anymore, so if you need it,
|
is non-NULL. Note that link body isn't available anymore, so if you need it,
|
||||||
store it as cookie.
|
store it as cookie.
|
||||||
|
--
|
||||||
|
[mandatory]
|
||||||
|
__fd_install() & fd_install() can now sleep. Callers should not
|
||||||
|
hold a spinlock or other resources that do not allow a schedule.
|
||||||
|
|
67
fs/file.c
67
fs/file.c
|
@ -147,6 +147,13 @@ static int expand_fdtable(struct files_struct *files, int nr)
|
||||||
|
|
||||||
spin_unlock(&files->file_lock);
|
spin_unlock(&files->file_lock);
|
||||||
new_fdt = alloc_fdtable(nr);
|
new_fdt = alloc_fdtable(nr);
|
||||||
|
|
||||||
|
/* make sure all __fd_install() have seen resize_in_progress
|
||||||
|
* or have finished their rcu_read_lock_sched() section.
|
||||||
|
*/
|
||||||
|
if (atomic_read(&files->count) > 1)
|
||||||
|
synchronize_sched();
|
||||||
|
|
||||||
spin_lock(&files->file_lock);
|
spin_lock(&files->file_lock);
|
||||||
if (!new_fdt)
|
if (!new_fdt)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
@ -158,21 +165,14 @@ static int expand_fdtable(struct files_struct *files, int nr)
|
||||||
__free_fdtable(new_fdt);
|
__free_fdtable(new_fdt);
|
||||||
return -EMFILE;
|
return -EMFILE;
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
* Check again since another task may have expanded the fd table while
|
|
||||||
* we dropped the lock
|
|
||||||
*/
|
|
||||||
cur_fdt = files_fdtable(files);
|
cur_fdt = files_fdtable(files);
|
||||||
if (nr >= cur_fdt->max_fds) {
|
BUG_ON(nr < cur_fdt->max_fds);
|
||||||
/* Continue as planned */
|
copy_fdtable(new_fdt, cur_fdt);
|
||||||
copy_fdtable(new_fdt, cur_fdt);
|
rcu_assign_pointer(files->fdt, new_fdt);
|
||||||
rcu_assign_pointer(files->fdt, new_fdt);
|
if (cur_fdt != &files->fdtab)
|
||||||
if (cur_fdt != &files->fdtab)
|
call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
|
||||||
call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
|
/* coupled with smp_rmb() in __fd_install() */
|
||||||
} else {
|
smp_wmb();
|
||||||
/* Somebody else expanded, so undo our attempt */
|
|
||||||
__free_fdtable(new_fdt);
|
|
||||||
}
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -185,21 +185,38 @@ static int expand_fdtable(struct files_struct *files, int nr)
|
||||||
* The files->file_lock should be held on entry, and will be held on exit.
|
* The files->file_lock should be held on entry, and will be held on exit.
|
||||||
*/
|
*/
|
||||||
static int expand_files(struct files_struct *files, int nr)
|
static int expand_files(struct files_struct *files, int nr)
|
||||||
|
__releases(files->file_lock)
|
||||||
|
__acquires(files->file_lock)
|
||||||
{
|
{
|
||||||
struct fdtable *fdt;
|
struct fdtable *fdt;
|
||||||
|
int expanded = 0;
|
||||||
|
|
||||||
|
repeat:
|
||||||
fdt = files_fdtable(files);
|
fdt = files_fdtable(files);
|
||||||
|
|
||||||
/* Do we need to expand? */
|
/* Do we need to expand? */
|
||||||
if (nr < fdt->max_fds)
|
if (nr < fdt->max_fds)
|
||||||
return 0;
|
return expanded;
|
||||||
|
|
||||||
/* Can we expand? */
|
/* Can we expand? */
|
||||||
if (nr >= sysctl_nr_open)
|
if (nr >= sysctl_nr_open)
|
||||||
return -EMFILE;
|
return -EMFILE;
|
||||||
|
|
||||||
|
if (unlikely(files->resize_in_progress)) {
|
||||||
|
spin_unlock(&files->file_lock);
|
||||||
|
expanded = 1;
|
||||||
|
wait_event(files->resize_wait, !files->resize_in_progress);
|
||||||
|
spin_lock(&files->file_lock);
|
||||||
|
goto repeat;
|
||||||
|
}
|
||||||
|
|
||||||
/* All good, so we try */
|
/* All good, so we try */
|
||||||
return expand_fdtable(files, nr);
|
files->resize_in_progress = true;
|
||||||
|
expanded = expand_fdtable(files, nr);
|
||||||
|
files->resize_in_progress = false;
|
||||||
|
|
||||||
|
wake_up_all(&files->resize_wait);
|
||||||
|
return expanded;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
|
static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
|
||||||
|
@ -256,6 +273,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
|
||||||
atomic_set(&newf->count, 1);
|
atomic_set(&newf->count, 1);
|
||||||
|
|
||||||
spin_lock_init(&newf->file_lock);
|
spin_lock_init(&newf->file_lock);
|
||||||
|
newf->resize_in_progress = false;
|
||||||
|
init_waitqueue_head(&newf->resize_wait);
|
||||||
newf->next_fd = 0;
|
newf->next_fd = 0;
|
||||||
new_fdt = &newf->fdtab;
|
new_fdt = &newf->fdtab;
|
||||||
new_fdt->max_fds = NR_OPEN_DEFAULT;
|
new_fdt->max_fds = NR_OPEN_DEFAULT;
|
||||||
|
@ -553,11 +572,21 @@ void __fd_install(struct files_struct *files, unsigned int fd,
|
||||||
struct file *file)
|
struct file *file)
|
||||||
{
|
{
|
||||||
struct fdtable *fdt;
|
struct fdtable *fdt;
|
||||||
spin_lock(&files->file_lock);
|
|
||||||
fdt = files_fdtable(files);
|
might_sleep();
|
||||||
|
rcu_read_lock_sched();
|
||||||
|
|
||||||
|
while (unlikely(files->resize_in_progress)) {
|
||||||
|
rcu_read_unlock_sched();
|
||||||
|
wait_event(files->resize_wait, !files->resize_in_progress);
|
||||||
|
rcu_read_lock_sched();
|
||||||
|
}
|
||||||
|
/* coupled with smp_wmb() in expand_fdtable() */
|
||||||
|
smp_rmb();
|
||||||
|
fdt = rcu_dereference_sched(files->fdt);
|
||||||
BUG_ON(fdt->fd[fd] != NULL);
|
BUG_ON(fdt->fd[fd] != NULL);
|
||||||
rcu_assign_pointer(fdt->fd[fd], file);
|
rcu_assign_pointer(fdt->fd[fd], file);
|
||||||
spin_unlock(&files->file_lock);
|
rcu_read_unlock_sched();
|
||||||
}
|
}
|
||||||
|
|
||||||
void fd_install(unsigned int fd, struct file *file)
|
void fd_install(unsigned int fd, struct file *file)
|
||||||
|
|
|
@ -47,6 +47,9 @@ struct files_struct {
|
||||||
* read mostly part
|
* read mostly part
|
||||||
*/
|
*/
|
||||||
atomic_t count;
|
atomic_t count;
|
||||||
|
bool resize_in_progress;
|
||||||
|
wait_queue_head_t resize_wait;
|
||||||
|
|
||||||
struct fdtable __rcu *fdt;
|
struct fdtable __rcu *fdt;
|
||||||
struct fdtable fdtab;
|
struct fdtable fdtab;
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue