ufs: remove the BKL

This introduces a new per-superblock mutex in UFS to replace
the big kernel lock. I have been careful to avoid nested
calls to lock_ufs and to get the lock order right with
respect to other mutexes, in particular lock_super.

I did not make any attempt to prove that the big kernel
lock is not needed in a particular place in the code,
which is very possible.

The mutex has a significant performance impact, so it is only
used on SMP or PREEMPT configurations.

As Nick Piggin noticed, any allocation inside of the lock
may end up deadlocking when we get to ufs_getfrag_block
in the reclaim task, so we now use GFP_NOFS.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Nick Bowler <nbowler@elliptictech.com>
Cc: Evgeniy Dushistov <dushistov@mail.ru>
Cc: Nick Piggin <npiggin@gmail.com>
This commit is contained in:
Arnd Bergmann 2011-01-24 10:14:12 +01:00
parent 9a311b96c3
commit 788257d610
7 changed files with 83 additions and 108 deletions

View File

@ -1,7 +1,6 @@
config UFS_FS
tristate "UFS file system support (read only)"
depends on BLOCK
depends on BKL # probably fixable
help
BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
OpenBSD and NeXTstep) use a file system called UFS. Some System V

View File

@ -34,7 +34,6 @@
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
@ -43,7 +42,7 @@
#include "swab.h"
#include "util.h"
static u64 ufs_frag_map(struct inode *inode, sector_t frag);
static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock);
static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4])
{
@ -82,7 +81,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
* the begining of the filesystem.
*/
static u64 ufs_frag_map(struct inode *inode, sector_t frag)
static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
{
struct ufs_inode_info *ufsi = UFS_I(inode);
struct super_block *sb = inode->i_sb;
@ -107,7 +106,8 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag)
p = offsets;
lock_kernel();
if (needs_lock)
lock_ufs(sb);
if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
goto ufs2;
@ -152,7 +152,8 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag)
ret = temp + (u64) (frag & uspi->s_fpbmask);
out:
unlock_kernel();
if (needs_lock)
unlock_ufs(sb);
return ret;
}
@ -415,14 +416,16 @@ ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create)
{
struct super_block * sb = inode->i_sb;
struct ufs_sb_private_info * uspi = UFS_SB(sb)->s_uspi;
struct ufs_sb_info * sbi = UFS_SB(sb);
struct ufs_sb_private_info * uspi = sbi->s_uspi;
struct buffer_head * bh;
int ret, err, new;
unsigned long ptr,phys;
u64 phys64 = 0;
bool needs_lock = (sbi->mutex_owner != current);
if (!create) {
phys64 = ufs_frag_map(inode, fragment);
phys64 = ufs_frag_map(inode, fragment, needs_lock);
UFSD("phys64 = %llu\n", (unsigned long long)phys64);
if (phys64)
map_bh(bh_result, sb, phys64);
@ -436,7 +439,8 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
ret = 0;
bh = NULL;
lock_kernel();
if (needs_lock)
lock_ufs(sb);
UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
if (fragment >
@ -498,7 +502,9 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
set_buffer_new(bh_result);
map_bh(bh_result, sb, phys);
abort:
unlock_kernel();
if (needs_lock)
unlock_ufs(sb);
return err;
abort_too_big:
@ -506,48 +512,6 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
goto abort;
}
static struct buffer_head *ufs_getfrag(struct inode *inode,
unsigned int fragment,
int create, int *err)
{
struct buffer_head dummy;
int error;
dummy.b_state = 0;
dummy.b_blocknr = -1000;
error = ufs_getfrag_block(inode, fragment, &dummy, create);
*err = error;
if (!error && buffer_mapped(&dummy)) {
struct buffer_head *bh;
bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
if (buffer_new(&dummy)) {
memset(bh->b_data, 0, inode->i_sb->s_blocksize);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
}
return bh;
}
return NULL;
}
struct buffer_head * ufs_bread (struct inode * inode, unsigned fragment,
int create, int * err)
{
struct buffer_head * bh;
UFSD("ENTER, ino %lu, fragment %u\n", inode->i_ino, fragment);
bh = ufs_getfrag (inode, fragment, create, err);
if (!bh || buffer_uptodate(bh))
return bh;
ll_rw_block (READ, 1, &bh);
wait_on_buffer (bh);
if (buffer_uptodate(bh))
return bh;
brelse (bh);
*err = -EIO;
return NULL;
}
static int ufs_writepage(struct page *page, struct writeback_control *wbc)
{
return block_write_full_page(page,ufs_getfrag_block,wbc);
@ -900,9 +864,9 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
int ufs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int ret;
lock_kernel();
lock_ufs(inode->i_sb);
ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
unlock_kernel();
unlock_ufs(inode->i_sb);
return ret;
}
@ -922,22 +886,22 @@ void ufs_evict_inode(struct inode * inode)
if (want_delete) {
loff_t old_i_size;
/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
lock_kernel();
lock_ufs(inode->i_sb);
mark_inode_dirty(inode);
ufs_update_inode(inode, IS_SYNC(inode));
old_i_size = inode->i_size;
inode->i_size = 0;
if (inode->i_blocks && ufs_truncate(inode, old_i_size))
ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
unlock_kernel();
unlock_ufs(inode->i_sb);
}
invalidate_inode_buffers(inode);
end_writeback(inode);
if (want_delete) {
lock_kernel();
lock_ufs(inode->i_sb);
ufs_free_inode (inode);
unlock_kernel();
unlock_ufs(inode->i_sb);
}
}

View File

@ -29,7 +29,6 @@
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/smp_lock.h>
#include "ufs_fs.h"
#include "ufs.h"
@ -55,16 +54,16 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
if (dentry->d_name.len > UFS_MAXNAMLEN)
return ERR_PTR(-ENAMETOOLONG);
lock_kernel();
lock_ufs(dir->i_sb);
ino = ufs_inode_by_name(dir, &dentry->d_name);
if (ino) {
inode = ufs_iget(dir->i_sb, ino);
if (IS_ERR(inode)) {
unlock_kernel();
unlock_ufs(dir->i_sb);
return ERR_CAST(inode);
}
}
unlock_kernel();
unlock_ufs(dir->i_sb);
d_add(dentry, inode);
return NULL;
}
@ -93,9 +92,9 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
inode->i_fop = &ufs_file_operations;
inode->i_mapping->a_ops = &ufs_aops;
mark_inode_dirty(inode);
lock_kernel();
lock_ufs(dir->i_sb);
err = ufs_add_nondir(dentry, inode);
unlock_kernel();
unlock_ufs(dir->i_sb);
}
UFSD("END: err=%d\n", err);
return err;
@ -115,9 +114,9 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t
init_special_inode(inode, mode, rdev);
ufs_set_inode_dev(inode->i_sb, UFS_I(inode), rdev);
mark_inode_dirty(inode);
lock_kernel();
lock_ufs(dir->i_sb);
err = ufs_add_nondir(dentry, inode);
unlock_kernel();
unlock_ufs(dir->i_sb);
}
return err;
}
@ -133,7 +132,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
if (l > sb->s_blocksize)
goto out_notlocked;
lock_kernel();
lock_ufs(dir->i_sb);
inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
err = PTR_ERR(inode);
if (IS_ERR(inode))
@ -156,7 +155,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
err = ufs_add_nondir(dentry, inode);
out:
unlock_kernel();
unlock_ufs(dir->i_sb);
out_notlocked:
return err;
@ -172,9 +171,9 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
struct inode *inode = old_dentry->d_inode;
int error;
lock_kernel();
lock_ufs(dir->i_sb);
if (inode->i_nlink >= UFS_LINK_MAX) {
unlock_kernel();
unlock_ufs(dir->i_sb);
return -EMLINK;
}
@ -183,7 +182,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
ihold(inode);
error = ufs_add_nondir(dentry, inode);
unlock_kernel();
unlock_ufs(dir->i_sb);
return error;
}
@ -195,7 +194,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
if (dir->i_nlink >= UFS_LINK_MAX)
goto out;
lock_kernel();
lock_ufs(dir->i_sb);
inode_inc_link_count(dir);
inode = ufs_new_inode(dir, S_IFDIR|mode);
@ -216,7 +215,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
err = ufs_add_link(dentry, inode);
if (err)
goto out_fail;
unlock_kernel();
unlock_ufs(dir->i_sb);
d_instantiate(dentry, inode);
out:
@ -228,7 +227,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
iput (inode);
out_dir:
inode_dec_link_count(dir);
unlock_kernel();
unlock_ufs(dir->i_sb);
goto out;
}
@ -259,7 +258,7 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
struct inode * inode = dentry->d_inode;
int err= -ENOTEMPTY;
lock_kernel();
lock_ufs(dir->i_sb);
if (ufs_empty_dir (inode)) {
err = ufs_unlink(dir, dentry);
if (!err) {
@ -268,7 +267,7 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
inode_dec_link_count(dir);
}
}
unlock_kernel();
unlock_ufs(dir->i_sb);
return err;
}

View File

@ -84,7 +84,6 @@
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/parser.h>
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/vfs.h>
#include <linux/log2.h>
@ -96,6 +95,26 @@
#include "swab.h"
#include "util.h"
void lock_ufs(struct super_block *sb)
{
#if defined(CONFIG_SMP) || defined (CONFIG_PREEMPT)
struct ufs_sb_info *sbi = UFS_SB(sb);
mutex_lock(&sbi->mutex);
sbi->mutex_owner = current;
#endif
}
void unlock_ufs(struct super_block *sb)
{
#if defined(CONFIG_SMP) || defined (CONFIG_PREEMPT)
struct ufs_sb_info *sbi = UFS_SB(sb);
sbi->mutex_owner = NULL;
mutex_unlock(&sbi->mutex);
#endif
}
static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation)
{
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
@ -313,7 +332,6 @@ void ufs_panic (struct super_block * sb, const char * function,
struct ufs_super_block_first * usb1;
va_list args;
lock_kernel();
uspi = UFS_SB(sb)->s_uspi;
usb1 = ubh_get_usb_first(uspi);
@ -521,7 +539,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
*/
size = uspi->s_cssize;
blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
base = space = kmalloc(size, GFP_KERNEL);
base = space = kmalloc(size, GFP_NOFS);
if (!base)
goto failed;
sbi->s_csp = (struct ufs_csum *)space;
@ -546,7 +564,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
* Read cylinder group (we read only first fragment from block
* at this time) and prepare internal data structures for cg caching.
*/
if (!(sbi->s_ucg = kmalloc (sizeof(struct buffer_head *) * uspi->s_ncg, GFP_KERNEL)))
if (!(sbi->s_ucg = kmalloc (sizeof(struct buffer_head *) * uspi->s_ncg, GFP_NOFS)))
goto failed;
for (i = 0; i < uspi->s_ncg; i++)
sbi->s_ucg[i] = NULL;
@ -564,7 +582,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
ufs_print_cylinder_stuff(sb, (struct ufs_cylinder_group *) sbi->s_ucg[i]->b_data);
}
for (i = 0; i < UFS_MAX_GROUP_LOADED; i++) {
if (!(sbi->s_ucpi[i] = kmalloc (sizeof(struct ufs_cg_private_info), GFP_KERNEL)))
if (!(sbi->s_ucpi[i] = kmalloc (sizeof(struct ufs_cg_private_info), GFP_NOFS)))
goto failed;
sbi->s_cgno[i] = UFS_CGNO_EMPTY;
}
@ -646,8 +664,6 @@ static void ufs_put_super_internal(struct super_block *sb)
UFSD("ENTER\n");
lock_kernel();
ufs_put_cstotal(sb);
size = uspi->s_cssize;
blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
@ -676,8 +692,6 @@ static void ufs_put_super_internal(struct super_block *sb)
kfree (sbi->s_ucg);
kfree (base);
unlock_kernel();
UFSD("EXIT\n");
}
@ -696,8 +710,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
unsigned maxsymlen;
int ret = -EINVAL;
lock_kernel();
uspi = NULL;
ubh = NULL;
flags = 0;
@ -718,6 +730,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
goto failed;
}
#endif
mutex_init(&sbi->mutex);
/*
* Set default mount options
* Parse mount options
@ -1165,7 +1178,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
goto failed;
UFSD("EXIT\n");
unlock_kernel();
return 0;
dalloc_failed:
@ -1177,12 +1189,10 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
kfree(sbi);
sb->s_fs_info = NULL;
UFSD("EXIT (FAILED)\n");
unlock_kernel();
return ret;
failed_nomem:
UFSD("EXIT (NOMEM)\n");
unlock_kernel();
return -ENOMEM;
}
@ -1193,8 +1203,8 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
struct ufs_super_block_third * usb3;
unsigned flags;
lock_ufs(sb);
lock_super(sb);
lock_kernel();
UFSD("ENTER\n");
@ -1213,8 +1223,8 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
sb->s_dirt = 0;
UFSD("EXIT\n");
unlock_kernel();
unlock_super(sb);
unlock_ufs(sb);
return 0;
}
@ -1256,7 +1266,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
unsigned new_mount_opt, ufstype;
unsigned flags;
lock_kernel();
lock_ufs(sb);
lock_super(sb);
uspi = UFS_SB(sb)->s_uspi;
flags = UFS_SB(sb)->s_flags;
@ -1272,7 +1282,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufs_set_opt (new_mount_opt, ONERROR_LOCK);
if (!ufs_parse_options (data, &new_mount_opt)) {
unlock_super(sb);
unlock_kernel();
unlock_ufs(sb);
return -EINVAL;
}
if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
@ -1280,14 +1290,14 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
printk("ufstype can't be changed during remount\n");
unlock_super(sb);
unlock_kernel();
unlock_ufs(sb);
return -EINVAL;
}
if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
UFS_SB(sb)->s_mount_opt = new_mount_opt;
unlock_super(sb);
unlock_kernel();
unlock_ufs(sb);
return 0;
}
@ -1313,7 +1323,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
printk("ufs was compiled with read-only support, "
"can't be mounted as read-write\n");
unlock_super(sb);
unlock_kernel();
unlock_ufs(sb);
return -EINVAL;
#else
if (ufstype != UFS_MOUNT_UFSTYPE_SUN &&
@ -1323,13 +1333,13 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
printk("this ufstype is read-only supported\n");
unlock_super(sb);
unlock_kernel();
unlock_ufs(sb);
return -EINVAL;
}
if (!ufs_read_cylinder_structures(sb)) {
printk("failed during remounting\n");
unlock_super(sb);
unlock_kernel();
unlock_ufs(sb);
return -EPERM;
}
sb->s_flags &= ~MS_RDONLY;
@ -1337,7 +1347,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
}
UFS_SB(sb)->s_mount_opt = new_mount_opt;
unlock_super(sb);
unlock_kernel();
unlock_ufs(sb);
return 0;
}
@ -1371,7 +1381,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct ufs_super_block_third *usb3;
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
lock_kernel();
lock_ufs(sb);
usb1 = ubh_get_usb_first(uspi);
usb2 = ubh_get_usb_second(uspi);
@ -1395,7 +1405,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32);
unlock_kernel();
unlock_ufs(sb);
return 0;
}
@ -1405,7 +1415,7 @@ static struct kmem_cache * ufs_inode_cachep;
static struct inode *ufs_alloc_inode(struct super_block *sb)
{
struct ufs_inode_info *ei;
ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_KERNEL);
ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
ei->vfs_inode.i_version = 1;

View File

@ -40,7 +40,6 @@
#include <linux/time.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/sched.h>
@ -467,7 +466,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
lock_kernel();
while (1) {
retry = ufs_trunc_direct(inode);
retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
@ -487,7 +485,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
ufsi->i_lastfrag = DIRECT_FRAGMENT;
unlock_kernel();
mark_inode_dirty(inode);
out:
UFSD("EXIT: err %d\n", err);
@ -510,7 +507,9 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
/* XXX(truncate): truncate_setsize should be called last */
truncate_setsize(inode, attr->ia_size);
lock_ufs(inode->i_sb);
error = ufs_truncate(inode, old_i_size);
unlock_ufs(inode->i_sb);
if (error)
return error;
}

View File

@ -18,6 +18,8 @@ struct ufs_sb_info {
unsigned s_cgno[UFS_MAX_GROUP_LOADED];
unsigned short s_cg_loaded;
unsigned s_mount_opt;
struct mutex mutex;
struct task_struct *mutex_owner;
};
struct ufs_inode_info {
@ -109,7 +111,6 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long);
extern int ufs_write_inode (struct inode *, struct writeback_control *);
extern int ufs_sync_inode (struct inode *);
extern void ufs_evict_inode (struct inode *);
extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create);
/* namei.c */
@ -154,4 +155,7 @@ static inline u32 ufs_dtogd(struct ufs_sb_private_info * uspi, u64 b)
return do_div(b, uspi->s_fpg);
}
extern void lock_ufs(struct super_block *sb);
extern void unlock_ufs(struct super_block *sb);
#endif /* _UFS_UFS_H */

View File

@ -27,7 +27,7 @@ struct ufs_buffer_head * _ubh_bread_ (struct ufs_sb_private_info * uspi,
if (count > UFS_MAXFRAG)
return NULL;
ubh = (struct ufs_buffer_head *)
kmalloc (sizeof (struct ufs_buffer_head), GFP_KERNEL);
kmalloc (sizeof (struct ufs_buffer_head), GFP_NOFS);
if (!ubh)
return NULL;
ubh->fragment = fragment;