linux/fs/autofs4/inode.c

350 lines
8.2 KiB
C
Raw Normal View History

/* -*- c -*- --------------------------------------------------------------- *
*
* linux/fs/autofs/inode.c
*
* Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
* Copyright 2005-2006 Ian Kent <raven@themaw.net>
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License, version 2, or at your
* option, any later version, incorporated herein by reference.
*
* ------------------------------------------------------------------------- */
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/seq_file.h>
#include <linux/pagemap.h>
#include <linux/parser.h>
#include <linux/bitops.h>
#include <linux/magic.h>
#include "autofs_i.h"
#include <linux/module.h>
struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi)
{
struct autofs_info *ino = kzalloc(sizeof(*ino), GFP_KERNEL);
if (ino) {
autofs4: use look aside list for lookups A while ago a patch to resolve a deadlock during directory creation was merged. This delayed the hashing of lookup dentrys until the ->mkdir() (or ->symlink()) operation completed to ensure we always went through ->lookup() instead of also having processes go through ->revalidate() so our VFS locking remained consistent. Now we are seeing a couple of side affects of that change in situations with heavy mount activity. Two cases have been identified: 1) When a mount request is triggered, due to the delayed hashing, the directory created by user space for the mount point doesn't have the DCACHE_AUTOFS_PENDING flag set. In the case of an autofs multi-mount where a tree of mount point directories are created this can lead to the path walk continuing rather than the dentry being sent to the wait queue to wait for request completion. This is because, if the pending flag isn't set, the criteria for deciding this is a mount in progress fails to hold, namely that the dentry is not a mount point and has no subdirectories. 2) A mount request dentry is initially created negative and unhashed. It remains this way until the ->mkdir() callback completes. Since it is unhashed a fresh dentry is used when the user space mount request creates the mount point directory. This leaves the original dentry negative and unhashed. But revalidate has no way to tell the VFS that the dentry has changed, other than to force another ->lookup() by returning false, which is at best wastefull and at worst not possible. This results in an -ENOENT return from the original path walk when in fact the mount succeeded. To resolve this we need to ensure that the same dentry is used in all calls to ->lookup() during the course of a mount request. This patch achieves that by adding the initial dentry to a look aside list and removes it at ->mkdir() or ->symlink() completion (or when the dentry is released), since these are the only create operations autofs4 supports. Signed-off-by: Ian Kent <raven@themaw.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 12:30:12 +08:00
INIT_LIST_HEAD(&ino->active);
INIT_LIST_HEAD(&ino->expiring);
ino->last_used = jiffies;
ino->sbi = sbi;
autofs4: use look aside list for lookups A while ago a patch to resolve a deadlock during directory creation was merged. This delayed the hashing of lookup dentrys until the ->mkdir() (or ->symlink()) operation completed to ensure we always went through ->lookup() instead of also having processes go through ->revalidate() so our VFS locking remained consistent. Now we are seeing a couple of side affects of that change in situations with heavy mount activity. Two cases have been identified: 1) When a mount request is triggered, due to the delayed hashing, the directory created by user space for the mount point doesn't have the DCACHE_AUTOFS_PENDING flag set. In the case of an autofs multi-mount where a tree of mount point directories are created this can lead to the path walk continuing rather than the dentry being sent to the wait queue to wait for request completion. This is because, if the pending flag isn't set, the criteria for deciding this is a mount in progress fails to hold, namely that the dentry is not a mount point and has no subdirectories. 2) A mount request dentry is initially created negative and unhashed. It remains this way until the ->mkdir() callback completes. Since it is unhashed a fresh dentry is used when the user space mount request creates the mount point directory. This leaves the original dentry negative and unhashed. But revalidate has no way to tell the VFS that the dentry has changed, other than to force another ->lookup() by returning false, which is at best wastefull and at worst not possible. This results in an -ENOENT return from the original path walk when in fact the mount succeeded. To resolve this we need to ensure that the same dentry is used in all calls to ->lookup() during the course of a mount request. This patch achieves that by adding the initial dentry to a look aside list and removes it at ->mkdir() or ->symlink() completion (or when the dentry is released), since these are the only create operations autofs4 supports. Signed-off-by: Ian Kent <raven@themaw.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 12:30:12 +08:00
}
return ino;
}
void autofs4_clean_ino(struct autofs_info *ino)
{
ino->uid = GLOBAL_ROOT_UID;
ino->gid = GLOBAL_ROOT_GID;
ino->last_used = jiffies;
}
void autofs4_free_ino(struct autofs_info *ino)
{
kfree(ino);
}
void autofs4_kill_sb(struct super_block *sb)
{
struct autofs_sb_info *sbi = autofs4_sbi(sb);
/*
* In the event of a failure in get_sb_nodev the superblock
* info is not present so nothing else has been setup, so
[PATCH] autofs: fix error code path in autofs_fill_sb() When kernel is compiled with old version of autofs (CONFIG_AUTOFS_FS), and new (observed at least with 5.x.x) automount deamon is started, kernel correctly reports incompatible version of kernel and userland daemon, but then screws things up instead of correct handling of the error: autofs: kernel does not match daemon version ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- automount/4199 is trying to release lock (&type->s_umount_key) at: [<c0163b9e>] get_sb_nodev+0x76/0xa4 but there are no more locks to release! other info that might help us debug this: no locks held by automount/4199. stack backtrace: [<c0103b15>] dump_trace+0x68/0x1b2 [<c0103c77>] show_trace_log_lvl+0x18/0x2c [<c01041db>] show_trace+0xf/0x11 [<c010424d>] dump_stack+0x12/0x14 [<c012e02c>] print_unlock_inbalance_bug+0xe7/0xf3 [<c012fd4f>] lock_release+0x8d/0x164 [<c012b452>] up_write+0x14/0x27 [<c0163b9e>] get_sb_nodev+0x76/0xa4 [<c0163689>] vfs_kern_mount+0x83/0xf6 [<c016373e>] do_kern_mount+0x2d/0x3e [<c017513f>] do_mount+0x607/0x67a [<c0175224>] sys_mount+0x72/0xa4 [<c0102b96>] sysenter_past_esp+0x5f/0x99 DWARF2 unwinder stuck at sysenter_past_esp+0x5f/0x99 Leftover inexact backtrace: ======================= and then deadlock comes. The problem: autofs_fill_super() returns EINVAL to get_sb_nodev(), but before that, it calls kill_anon_super() to destroy the superblock which won't be needed. This is however way too soon to call kill_anon_super(), because get_sb_nodev() has to perform its own cleanup of the superblock first (deactivate_super(), etc.). The correct time to call kill_anon_super() is in the autofs_kill_sb() callback, which is called by deactivate_super() at proper time, when the superblock is ready to be killed. I can see the same faulty codepath also in autofs4. This patch solves issues in both filesystems in a same way - it postpones the kill_anon_super() until the proper time is signalized by deactivate_super() calling the kill_sb() callback. [raven@themaw.net: update comment] Signed-off-by: Jiri Kosina <jkosina@suse.cz> Acked-by: Ian Kent <raven@themaw.net> Cc: <stable@kernel.org> Signed-off-by: Ian Kent <raven@themaw.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-07 12:39:38 +08:00
* just call kill_anon_super when we are called from
* deactivate_super.
*/
if (sbi) /* Free wait queues, close pipe */
autofs4_catatonic_mode(sbi);
DPRINTK("shutting down");
kill_litter_super(sb);
if (sbi)
kfree_rcu(sbi, rcu);
}
static int autofs4_show_options(struct seq_file *m, struct dentry *root)
{
struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
struct inode *root_inode = root->d_sb->s_root->d_inode;
if (!sbi)
return 0;
seq_printf(m, ",fd=%d", sbi->pipefd);
if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID))
seq_printf(m, ",uid=%u",
from_kuid_munged(&init_user_ns, root_inode->i_uid));
if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID))
seq_printf(m, ",gid=%u",
from_kgid_munged(&init_user_ns, root_inode->i_gid));
seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
seq_printf(m, ",minproto=%d", sbi->min_proto);
seq_printf(m, ",maxproto=%d", sbi->max_proto);
if (autofs_type_offset(sbi->type))
seq_printf(m, ",offset");
else if (autofs_type_direct(sbi->type))
seq_printf(m, ",direct");
else
seq_printf(m, ",indirect");
return 0;
}
static void autofs4_evict_inode(struct inode *inode)
{
clear_inode(inode);
kfree(inode->i_private);
}
static const struct super_operations autofs4_sops = {
.statfs = simple_statfs,
.show_options = autofs4_show_options,
.evict_inode = autofs4_evict_inode,
};
enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
Opt_indirect, Opt_direct, Opt_offset};
static const match_table_t tokens = {
{Opt_fd, "fd=%u"},
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
{Opt_pgrp, "pgrp=%u"},
{Opt_minproto, "minproto=%u"},
{Opt_maxproto, "maxproto=%u"},
{Opt_indirect, "indirect"},
{Opt_direct, "direct"},
{Opt_offset, "offset"},
{Opt_err, NULL}
};
static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
*uid = current_uid();
*gid = current_gid();
*pgrp = task_pgrp_nr(current);
*minproto = AUTOFS_MIN_PROTO_VERSION;
*maxproto = AUTOFS_MAX_PROTO_VERSION;
*pipefd = -1;
if (!options)
return 1;
while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
continue;
token = match_token(p, tokens, args);
switch (token) {
case Opt_fd:
if (match_int(args, pipefd))
return 1;
break;
case Opt_uid:
if (match_int(args, &option))
return 1;
*uid = make_kuid(current_user_ns(), option);
if (!uid_valid(*uid))
return 1;
break;
case Opt_gid:
if (match_int(args, &option))
return 1;
*gid = make_kgid(current_user_ns(), option);
if (!gid_valid(*gid))
return 1;
break;
case Opt_pgrp:
if (match_int(args, &option))
return 1;
*pgrp = option;
break;
case Opt_minproto:
if (match_int(args, &option))
return 1;
*minproto = option;
break;
case Opt_maxproto:
if (match_int(args, &option))
return 1;
*maxproto = option;
break;
case Opt_indirect:
set_autofs_type_indirect(type);
break;
case Opt_direct:
set_autofs_type_direct(type);
break;
case Opt_offset:
set_autofs_type_offset(type);
break;
default:
return 1;
}
}
return (*pipefd < 0);
}
int autofs4_fill_super(struct super_block *s, void *data, int silent)
{
struct inode * root_inode;
struct dentry * root;
struct file * pipe;
int pipefd;
struct autofs_sb_info *sbi;
struct autofs_info *ino;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
goto fail_unlock;
DPRINTK("starting up, sbi = %p",sbi);
s->s_fs_info = sbi;
sbi->magic = AUTOFS_SBI_MAGIC;
sbi->pipefd = -1;
sbi->pipe = NULL;
sbi->catatonic = 1;
sbi->exp_timeout = 0;
sbi->oz_pgrp = task_pgrp_nr(current);
sbi->sb = s;
sbi->version = 0;
sbi->sub_version = 0;
set_autofs_type_indirect(&sbi->type);
sbi->min_proto = 0;
sbi->max_proto = 0;
mutex_init(&sbi->wq_mutex);
mutex_init(&sbi->pipe_mutex);
spin_lock_init(&sbi->fs_lock);
sbi->queues = NULL;
spin_lock_init(&sbi->lookup_lock);
autofs4: use look aside list for lookups A while ago a patch to resolve a deadlock during directory creation was merged. This delayed the hashing of lookup dentrys until the ->mkdir() (or ->symlink()) operation completed to ensure we always went through ->lookup() instead of also having processes go through ->revalidate() so our VFS locking remained consistent. Now we are seeing a couple of side affects of that change in situations with heavy mount activity. Two cases have been identified: 1) When a mount request is triggered, due to the delayed hashing, the directory created by user space for the mount point doesn't have the DCACHE_AUTOFS_PENDING flag set. In the case of an autofs multi-mount where a tree of mount point directories are created this can lead to the path walk continuing rather than the dentry being sent to the wait queue to wait for request completion. This is because, if the pending flag isn't set, the criteria for deciding this is a mount in progress fails to hold, namely that the dentry is not a mount point and has no subdirectories. 2) A mount request dentry is initially created negative and unhashed. It remains this way until the ->mkdir() callback completes. Since it is unhashed a fresh dentry is used when the user space mount request creates the mount point directory. This leaves the original dentry negative and unhashed. But revalidate has no way to tell the VFS that the dentry has changed, other than to force another ->lookup() by returning false, which is at best wastefull and at worst not possible. This results in an -ENOENT return from the original path walk when in fact the mount succeeded. To resolve this we need to ensure that the same dentry is used in all calls to ->lookup() during the course of a mount request. This patch achieves that by adding the initial dentry to a look aside list and removes it at ->mkdir() or ->symlink() completion (or when the dentry is released), since these are the only create operations autofs4 supports. Signed-off-by: Ian Kent <raven@themaw.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 12:30:12 +08:00
INIT_LIST_HEAD(&sbi->active_list);
INIT_LIST_HEAD(&sbi->expiring_list);
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = AUTOFS_SUPER_MAGIC;
s->s_op = &autofs4_sops;
s->s_d_op = &autofs4_dentry_operations;
s->s_time_gran = 1;
/*
* Get the root inode and dentry, but defer checking for errors.
*/
ino = autofs4_new_ino(sbi);
if (!ino)
goto fail_free;
root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
root = d_make_root(root_inode);
if (!root)
goto fail_ino;
pipe = NULL;
root->d_fsdata = ino;
/* Can this call block? */
if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid,
&sbi->oz_pgrp, &sbi->type, &sbi->min_proto,
&sbi->max_proto)) {
printk("autofs: called with bogus options\n");
goto fail_dput;
}
if (autofs_type_trigger(sbi->type))
__managed_dentry_set_managed(root);
root_inode->i_fop = &autofs4_root_operations;
root_inode->i_op = &autofs4_dir_inode_operations;
/* Couldn't this be tested earlier? */
if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) {
printk("autofs: kernel does not match daemon version "
"daemon (%d, %d) kernel (%d, %d)\n",
sbi->min_proto, sbi->max_proto,
AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION);
goto fail_dput;
}
/* Establish highest kernel protocol version */
if (sbi->max_proto > AUTOFS_MAX_PROTO_VERSION)
sbi->version = AUTOFS_MAX_PROTO_VERSION;
else
sbi->version = sbi->max_proto;
sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp);
pipe = fget(pipefd);
if (!pipe) {
printk("autofs: could not open pipe file descriptor\n");
goto fail_dput;
}
autofs: make the autofsv5 packet file descriptor use a packetized pipe The autofs packet size has had a very unfortunate size problem on x86: because the alignment of 'u64' differs in 32-bit and 64-bit modes, and because the packet data was not 8-byte aligned, the size of the autofsv5 packet structure differed between 32-bit and 64-bit modes despite looking otherwise identical (300 vs 304 bytes respectively). We first fixed that up by making the 64-bit compat mode know about this problem in commit a32744d4abae ("autofs: work around unhappy compat problem on x86-64"), and that made a 32-bit 'systemd' work happily on a 64-bit kernel because everything then worked the same way as on a 32-bit kernel. But it turned out that 'automount' had actually known and worked around this problem in user space, so fixing the kernel to do the proper 32-bit compatibility handling actually *broke* 32-bit automount on a 64-bit kernel, because it knew that the packet sizes were wrong and expected those incorrect sizes. As a result, we ended up reverting that compatibility mode fix, and thus breaking systemd again, in commit fcbf94b9dedd. With both automount and systemd doing a single read() system call, and verifying that they get *exactly* the size they expect but using different sizes, it seemed that fixing one of them inevitably seemed to break the other. At one point, a patch I seriously considered applying from Michael Tokarev did a "strcmp()" to see if it was automount that was doing the operation. Ugly, ugly. However, a prettier solution exists now thanks to the packetized pipe mode. By marking the communication pipe as being packetized (by simply setting the O_DIRECT flag), we can always just write the bigger packet size, and if user-space does a smaller read, it will just get that partial end result and the extra alignment padding will simply be thrown away. This makes both automount and systemd happy, since they now get the size they asked for, and the kernel side of autofs simply no longer needs to care - it could pad out the packet arbitrarily. Of course, if there is some *other* user of autofs (please, please, please tell me it ain't so - and we haven't heard of any) that tries to read the packets with multiple writes, that other user will now be broken - the whole point of the packetized mode is that one system call gets exactly one packet, and you cannot read a packet in pieces. Tested-by: Michael Tokarev <mjt@tls.msk.ru> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: David Miller <davem@davemloft.net> Cc: Ian Kent <raven@themaw.net> Cc: Thomas Meyer <thomas@m3y3r.de> Cc: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-04-30 04:30:08 +08:00
if (autofs_prepare_pipe(pipe) < 0)
goto fail_fput;
sbi->pipe = pipe;
sbi->pipefd = pipefd;
sbi->catatonic = 0;
/*
* Success! Install the root dentry now to indicate completion.
*/
s->s_root = root;
return 0;
/*
* Failure ... clean up.
*/
fail_fput:
printk("autofs: pipe file descriptor does not contain proper ops\n");
fput(pipe);
/* fall through */
fail_dput:
dput(root);
goto fail_free;
fail_ino:
kfree(ino);
fail_free:
kfree(sbi);
s->s_fs_info = NULL;
fail_unlock:
return -EINVAL;
}
struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
{
struct inode *inode = new_inode(sb);
if (inode == NULL)
return NULL;
inode->i_mode = mode;
if (sb->s_root) {
inode->i_uid = sb->s_root->d_inode->i_uid;
inode->i_gid = sb->s_root->d_inode->i_gid;
}
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_ino = get_next_ino();
if (S_ISDIR(mode)) {
set_nlink(inode, 2);
inode->i_op = &autofs4_dir_inode_operations;
inode->i_fop = &autofs4_dir_operations;
} else if (S_ISLNK(mode)) {
inode->i_op = &autofs4_symlink_inode_operations;
}
return inode;
}