linux/drivers/mtd/mtd_blkdevs.c

614 lines
13 KiB
C
Raw Normal View History

/*
* Interface to Linux block layer for MTD 'translation layers'.
*
* Copyright © 2003-2010 David Woodhouse <dwmw2@infradead.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/fs.h>
#include <linux/mtd/blktrans.h>
#include <linux/mtd/mtd.h>
#include <linux/blkdev.h>
#include <linux/blkpg.h>
#include <linux/spinlock.h>
#include <linux/hdreg.h>
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/kthread.h>
#include <asm/uaccess.h>
#include "mtdcore.h"
static LIST_HEAD(blktrans_majors);
static DEFINE_MUTEX(blktrans_ref_mutex);
static void blktrans_dev_release(struct kref *kref)
{
struct mtd_blktrans_dev *dev =
container_of(kref, struct mtd_blktrans_dev, ref);
dev->disk->private_data = NULL;
blk_cleanup_queue(dev->rq);
put_disk(dev->disk);
list_del(&dev->list);
kfree(dev);
}
static struct mtd_blktrans_dev *blktrans_dev_get(struct gendisk *disk)
{
struct mtd_blktrans_dev *dev;
mutex_lock(&blktrans_ref_mutex);
dev = disk->private_data;
if (!dev)
goto unlock;
kref_get(&dev->ref);
unlock:
mutex_unlock(&blktrans_ref_mutex);
return dev;
}
static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
{
mutex_lock(&blktrans_ref_mutex);
kref_put(&dev->ref, blktrans_dev_release);
mutex_unlock(&blktrans_ref_mutex);
}
static int do_blktrans_request(struct mtd_blktrans_ops *tr,
struct mtd_blktrans_dev *dev,
struct request *req)
{
unsigned long block, nsect;
char *buf;
block: convert to pos and nr_sectors accessors With recent cleanups, there is no place where low level driver directly manipulates request fields. This means that the 'hard' request fields always equal the !hard fields. Convert all rq->sectors, nr_sectors and current_nr_sectors references to accessors. While at it, drop superflous blk_rq_pos() < 0 test in swim.c. [ Impact: use pos and nr_sectors accessors ] Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com> Tested-by: Grant Likely <grant.likely@secretlab.ca> Acked-by: Grant Likely <grant.likely@secretlab.ca> Tested-by: Adrian McMenamin <adrian@mcmen.demon.co.uk> Acked-by: Adrian McMenamin <adrian@mcmen.demon.co.uk> Acked-by: Mike Miller <mike.miller@hp.com> Cc: James Bottomley <James.Bottomley@HansenPartnership.com> Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> Cc: Borislav Petkov <petkovbb@googlemail.com> Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com> Cc: Eric Moore <Eric.Moore@lsi.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: Pete Zaitcev <zaitcev@redhat.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Paul Clements <paul.clements@steeleye.com> Cc: Tim Waugh <tim@cyberelk.net> Cc: Jeff Garzik <jgarzik@pobox.com> Cc: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Alex Dubov <oakad@yahoo.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Dario Ballabio <ballabio_dario@emc.com> Cc: David S. Miller <davem@davemloft.net> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: unsik Kim <donari75@gmail.com> Cc: Laurent Vivier <Laurent@lvivier.info> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
2009-05-07 21:24:39 +08:00
block = blk_rq_pos(req) << 9 >> tr->blkshift;
nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
buf = req->buffer;
if (req->cmd_type != REQ_TYPE_FS)
2009-04-23 10:05:19 +08:00
return -EIO;
block: convert to pos and nr_sectors accessors With recent cleanups, there is no place where low level driver directly manipulates request fields. This means that the 'hard' request fields always equal the !hard fields. Convert all rq->sectors, nr_sectors and current_nr_sectors references to accessors. While at it, drop superflous blk_rq_pos() < 0 test in swim.c. [ Impact: use pos and nr_sectors accessors ] Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com> Tested-by: Grant Likely <grant.likely@secretlab.ca> Acked-by: Grant Likely <grant.likely@secretlab.ca> Tested-by: Adrian McMenamin <adrian@mcmen.demon.co.uk> Acked-by: Adrian McMenamin <adrian@mcmen.demon.co.uk> Acked-by: Mike Miller <mike.miller@hp.com> Cc: James Bottomley <James.Bottomley@HansenPartnership.com> Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> Cc: Borislav Petkov <petkovbb@googlemail.com> Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com> Cc: Eric Moore <Eric.Moore@lsi.com> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: Pete Zaitcev <zaitcev@redhat.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Paul Clements <paul.clements@steeleye.com> Cc: Tim Waugh <tim@cyberelk.net> Cc: Jeff Garzik <jgarzik@pobox.com> Cc: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Alex Dubov <oakad@yahoo.com> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Dario Ballabio <ballabio_dario@emc.com> Cc: David S. Miller <davem@davemloft.net> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: unsik Kim <donari75@gmail.com> Cc: Laurent Vivier <Laurent@lvivier.info> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
2009-05-07 21:24:39 +08:00
if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
get_capacity(req->rq_disk))
2009-04-23 10:05:19 +08:00
return -EIO;
if (req->cmd_flags & REQ_DISCARD)
return tr->discard(dev, block, nsect);
switch(rq_data_dir(req)) {
case READ:
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->readsect(dev, block, buf))
2009-04-23 10:05:19 +08:00
return -EIO;
rq_flush_dcache_pages(req);
2009-04-23 10:05:19 +08:00
return 0;
case WRITE:
if (!tr->writesect)
2009-04-23 10:05:19 +08:00
return -EIO;
rq_flush_dcache_pages(req);
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->writesect(dev, block, buf))
2009-04-23 10:05:19 +08:00
return -EIO;
return 0;
default:
printk(KERN_NOTICE "Unknown request %u\n", rq_data_dir(req));
2009-04-23 10:05:19 +08:00
return -EIO;
}
}
int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev)
{
if (kthread_should_stop())
return 1;
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus-1 * 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6: (9356 commits) [media] rc: update for bitop name changes fs: simplify iget & friends fs: pull inode->i_lock up out of writeback_single_inode fs: rename inode_lock to inode_hash_lock fs: move i_wb_list out from under inode_lock fs: move i_sb_list out from under inode_lock fs: remove inode_lock from iput_final and prune_icache fs: Lock the inode LRU list separately fs: factor inode disposal fs: protect inode->i_state with inode->i_lock lib, arch: add filter argument to show_mem and fix private implementations SLUB: Write to per cpu data when allocating it slub: Fix debugobjects with lockless fastpath autofs4: Do not potentially dereference NULL pointer returned by fget() in autofs_dev_ioctl_setpipefd() autofs4 - remove autofs4_lock autofs4 - fix d_manage() return on rcu-walk autofs4 - fix autofs4_expire_indirect() traversal autofs4 - fix dentry leak in autofs4_expire_direct() autofs4 - reinstate last used update on access vfs - check non-mountpoint dentry might block in __follow_mount_rcu() ... NOTE! This merge commit was created to fix compilation error. The block tree was merged upstream and removed the 'elv_queue_empty()' function which the new 'mtdswap' driver is using. So a simple merge of the mtd tree with upstream does not compile. And the mtd tree has already be published, so re-basing it is not an option. To fix this unfortunate situation, I had to merge upstream into the mtd-2.6.git tree without committing, put the fixup patch on top of this, and then commit this. The result is that we do not have commits which do not compile. In other words, this merge commit "merges" 3 things: the MTD tree, the upstream tree, and the fixup patch.
2011-03-25 23:41:20 +08:00
return dev->bg_stop;
}
EXPORT_SYMBOL_GPL(mtd_blktrans_cease_background);
static int mtd_blktrans_thread(void *arg)
{
struct mtd_blktrans_dev *dev = arg;
struct mtd_blktrans_ops *tr = dev->tr;
struct request_queue *rq = dev->rq;
struct request *req = NULL;
int background_done = 0;
spin_lock_irq(rq->queue_lock);
while (!kthread_should_stop()) {
2009-04-23 10:05:19 +08:00
int res;
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus-1 * 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6: (9356 commits) [media] rc: update for bitop name changes fs: simplify iget & friends fs: pull inode->i_lock up out of writeback_single_inode fs: rename inode_lock to inode_hash_lock fs: move i_wb_list out from under inode_lock fs: move i_sb_list out from under inode_lock fs: remove inode_lock from iput_final and prune_icache fs: Lock the inode LRU list separately fs: factor inode disposal fs: protect inode->i_state with inode->i_lock lib, arch: add filter argument to show_mem and fix private implementations SLUB: Write to per cpu data when allocating it slub: Fix debugobjects with lockless fastpath autofs4: Do not potentially dereference NULL pointer returned by fget() in autofs_dev_ioctl_setpipefd() autofs4 - remove autofs4_lock autofs4 - fix d_manage() return on rcu-walk autofs4 - fix autofs4_expire_indirect() traversal autofs4 - fix dentry leak in autofs4_expire_direct() autofs4 - reinstate last used update on access vfs - check non-mountpoint dentry might block in __follow_mount_rcu() ... NOTE! This merge commit was created to fix compilation error. The block tree was merged upstream and removed the 'elv_queue_empty()' function which the new 'mtdswap' driver is using. So a simple merge of the mtd tree with upstream does not compile. And the mtd tree has already be published, so re-basing it is not an option. To fix this unfortunate situation, I had to merge upstream into the mtd-2.6.git tree without committing, put the fixup patch on top of this, and then commit this. The result is that we do not have commits which do not compile. In other words, this merge commit "merges" 3 things: the MTD tree, the upstream tree, and the fixup patch.
2011-03-25 23:41:20 +08:00
dev->bg_stop = false;
block: implement and enforce request peek/start/fetch Till now block layer allowed two separate modes of request execution. A request is always acquired from the request queue via elv_next_request(). After that, drivers are free to either dequeue it or process it without dequeueing. Dequeue allows elv_next_request() to return the next request so that multiple requests can be in flight. Executing requests without dequeueing has its merits mostly in allowing drivers for simpler devices which can't do sg to deal with segments only without considering request boundary. However, the benefit this brings is dubious and declining while the cost of the API ambiguity is increasing. Segment based drivers are usually for very old or limited devices and as converting to dequeueing model isn't difficult, it doesn't justify the API overhead it puts on block layer and its more modern users. Previous patches converted all block low level drivers to dequeueing model. This patch completes the API transition by... * renaming elv_next_request() to blk_peek_request() * renaming blkdev_dequeue_request() to blk_start_request() * adding blk_fetch_request() which is combination of peek and start * disallowing completion of queued (not started) requests * applying new API to all LLDs Renamings are for consistency and to break out of tree code so that it's apparent that out of tree drivers need updating. [ Impact: block request issue API cleanup, no functional change ] Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: James Bottomley <James.Bottomley@HansenPartnership.com> Cc: Mike Miller <mike.miller@hp.com> Cc: unsik Kim <donari75@gmail.com> Cc: Paul Clements <paul.clements@steeleye.com> Cc: Tim Waugh <tim@cyberelk.net> Cc: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com> Cc: David S. Miller <davem@davemloft.net> Cc: Laurent Vivier <Laurent@lvivier.info> Cc: Jeff Garzik <jgarzik@pobox.com> Cc: Jeremy Fitzhardinge <jeremy@xensource.com> Cc: Grant Likely <grant.likely@secretlab.ca> Cc: Adrian McMenamin <adrian@mcmen.demon.co.uk> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com> Cc: Borislav Petkov <petkovbb@googlemail.com> Cc: Sergei Shtylyov <sshtylyov@ru.mvista.com> Cc: Alex Dubov <oakad@yahoo.com> Cc: Pierre Ossman <drzeus@drzeus.cx> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Markus Lidel <Markus.Lidel@shadowconnect.com> Cc: Stefan Weinhuber <wein@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Pete Zaitcev <zaitcev@redhat.com> Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
2009-05-08 10:54:16 +08:00
if (!req && !(req = blk_fetch_request(rq))) {
if (tr->background && !background_done) {
spin_unlock_irq(rq->queue_lock);
mutex_lock(&dev->lock);
tr->background(dev);
mutex_unlock(&dev->lock);
spin_lock_irq(rq->queue_lock);
/*
* Do background processing just once per idle
* period.
*/
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus-1 * 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6: (9356 commits) [media] rc: update for bitop name changes fs: simplify iget & friends fs: pull inode->i_lock up out of writeback_single_inode fs: rename inode_lock to inode_hash_lock fs: move i_wb_list out from under inode_lock fs: move i_sb_list out from under inode_lock fs: remove inode_lock from iput_final and prune_icache fs: Lock the inode LRU list separately fs: factor inode disposal fs: protect inode->i_state with inode->i_lock lib, arch: add filter argument to show_mem and fix private implementations SLUB: Write to per cpu data when allocating it slub: Fix debugobjects with lockless fastpath autofs4: Do not potentially dereference NULL pointer returned by fget() in autofs_dev_ioctl_setpipefd() autofs4 - remove autofs4_lock autofs4 - fix d_manage() return on rcu-walk autofs4 - fix autofs4_expire_indirect() traversal autofs4 - fix dentry leak in autofs4_expire_direct() autofs4 - reinstate last used update on access vfs - check non-mountpoint dentry might block in __follow_mount_rcu() ... NOTE! This merge commit was created to fix compilation error. The block tree was merged upstream and removed the 'elv_queue_empty()' function which the new 'mtdswap' driver is using. So a simple merge of the mtd tree with upstream does not compile. And the mtd tree has already be published, so re-basing it is not an option. To fix this unfortunate situation, I had to merge upstream into the mtd-2.6.git tree without committing, put the fixup patch on top of this, and then commit this. The result is that we do not have commits which do not compile. In other words, this merge commit "merges" 3 things: the MTD tree, the upstream tree, and the fixup patch.
2011-03-25 23:41:20 +08:00
background_done = !dev->bg_stop;
continue;
}
set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop())
set_current_state(TASK_RUNNING);
spin_unlock_irq(rq->queue_lock);
schedule();
spin_lock_irq(rq->queue_lock);
continue;
}
spin_unlock_irq(rq->queue_lock);
mutex_lock(&dev->lock);
res = do_blktrans_request(dev->tr, dev, req);
mutex_unlock(&dev->lock);
spin_lock_irq(rq->queue_lock);
if (!__blk_end_request_cur(req, res))
req = NULL;
background_done = 0;
}
if (req)
__blk_end_request_all(req, -EIO);
spin_unlock_irq(rq->queue_lock);
return 0;
}
static void mtd_blktrans_request(struct request_queue *rq)
{
struct mtd_blktrans_dev *dev;
struct request *req = NULL;
dev = rq->queuedata;
if (!dev)
while ((req = blk_fetch_request(rq)) != NULL)
__blk_end_request_all(req, -ENODEV);
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus-1 * 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6: (9356 commits) [media] rc: update for bitop name changes fs: simplify iget & friends fs: pull inode->i_lock up out of writeback_single_inode fs: rename inode_lock to inode_hash_lock fs: move i_wb_list out from under inode_lock fs: move i_sb_list out from under inode_lock fs: remove inode_lock from iput_final and prune_icache fs: Lock the inode LRU list separately fs: factor inode disposal fs: protect inode->i_state with inode->i_lock lib, arch: add filter argument to show_mem and fix private implementations SLUB: Write to per cpu data when allocating it slub: Fix debugobjects with lockless fastpath autofs4: Do not potentially dereference NULL pointer returned by fget() in autofs_dev_ioctl_setpipefd() autofs4 - remove autofs4_lock autofs4 - fix d_manage() return on rcu-walk autofs4 - fix autofs4_expire_indirect() traversal autofs4 - fix dentry leak in autofs4_expire_direct() autofs4 - reinstate last used update on access vfs - check non-mountpoint dentry might block in __follow_mount_rcu() ... NOTE! This merge commit was created to fix compilation error. The block tree was merged upstream and removed the 'elv_queue_empty()' function which the new 'mtdswap' driver is using. So a simple merge of the mtd tree with upstream does not compile. And the mtd tree has already be published, so re-basing it is not an option. To fix this unfortunate situation, I had to merge upstream into the mtd-2.6.git tree without committing, put the fixup patch on top of this, and then commit this. The result is that we do not have commits which do not compile. In other words, this merge commit "merges" 3 things: the MTD tree, the upstream tree, and the fixup patch.
2011-03-25 23:41:20 +08:00
else {
dev->bg_stop = true;
wake_up_process(dev->thread);
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus-1 * 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6: (9356 commits) [media] rc: update for bitop name changes fs: simplify iget & friends fs: pull inode->i_lock up out of writeback_single_inode fs: rename inode_lock to inode_hash_lock fs: move i_wb_list out from under inode_lock fs: move i_sb_list out from under inode_lock fs: remove inode_lock from iput_final and prune_icache fs: Lock the inode LRU list separately fs: factor inode disposal fs: protect inode->i_state with inode->i_lock lib, arch: add filter argument to show_mem and fix private implementations SLUB: Write to per cpu data when allocating it slub: Fix debugobjects with lockless fastpath autofs4: Do not potentially dereference NULL pointer returned by fget() in autofs_dev_ioctl_setpipefd() autofs4 - remove autofs4_lock autofs4 - fix d_manage() return on rcu-walk autofs4 - fix autofs4_expire_indirect() traversal autofs4 - fix dentry leak in autofs4_expire_direct() autofs4 - reinstate last used update on access vfs - check non-mountpoint dentry might block in __follow_mount_rcu() ... NOTE! This merge commit was created to fix compilation error. The block tree was merged upstream and removed the 'elv_queue_empty()' function which the new 'mtdswap' driver is using. So a simple merge of the mtd tree with upstream does not compile. And the mtd tree has already be published, so re-basing it is not an option. To fix this unfortunate situation, I had to merge upstream into the mtd-2.6.git tree without committing, put the fixup patch on top of this, and then commit this. The result is that we do not have commits which do not compile. In other words, this merge commit "merges" 3 things: the MTD tree, the upstream tree, and the fixup patch.
2011-03-25 23:41:20 +08:00
}
}
static int blktrans_open(struct block_device *bdev, fmode_t mode)
{
struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk);
mtd: allow to unload the mtdtrans module if its block devices aren't open Now it once again possible to remove mtdtrans module. You still need to ensure that block devices of that module aren't mounted. This is due to the fact that as long as a block device is open, it still exists, therefore if we were to allow module removal, this block device might became used again. This time in addition to code review, I also made the code pass some torture tests like module reload in a loop + read in a loop + card insert/removal all at same time. The blktrans_open/blktrans_release don't take the mtd table lock because: While device is added (that includes execution of add_mtd_blktrans_dev) the lock is already taken Now suppose the device will never be removed. In this case even if we have changes in mtd table, the entry that we need will stay exactly the same. (Note that we don't look at table at all, just following private pointer of block device). Now suppose that someone tries to remove the mtd device. This will be propagated to trans driver which _ought_ to call del_mtd_blktrans_dev which will take the per device lock, release the mtd device and set trans->mtd = NULL. >From this point on, following opens won't even be able to know anything about that mtd device (which at that point is likely not to exist) Also the same care is taken not to trip over NULL mtd pointer in blktrans_dev_release. Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com> Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
2010-10-15 23:20:43 +08:00
int ret = 0;
if (!dev)
return -ERESTARTSYS; /* FIXME: busy loop! -arnd*/
mutex_lock(&dev->lock);
mtd: mtd_blkdevs: don't increase 'open' count on error path Some error paths in mtd_blkdevs were fixed in the following commit: commit 94735ec4044a6d318b83ad3c5794e931ed168d10 mtd: mtd_blkdevs: fix error path in blktrans_open But on these error paths, the block device's `dev->open' count is already incremented before we check for errors. This meant that, while the error path was handled correctly on the first time through blktrans_open(), the device is erroneously considered already open on the second time through. This problem can be seen, for instance, when a UBI volume is simultaneously mounted as a UBIFS partition and read through its corresponding gluebi mtdblockX device. This results in blktrans_open() passing its error checks (with `dev->open > 0') without actually having a handle on the device. Here's a summarized log of the actions and results with nandsim: # modprobe nandsim # modprobe mtdblock # modprobe gluebi # modprobe ubifs # ubiattach /dev/ubi_ctrl -m 0 ... # ubimkvol /dev/ubi0 -N test -s 16MiB ... # mount -t ubifs ubi0:test /mnt # ls /dev/mtdblock* /dev/mtdblock0 /dev/mtdblock1 # cat /dev/mtdblock1 > /dev/null cat: can't open '/dev/mtdblock4': Device or resource busy # cat /dev/mtdblock1 > /dev/null CPU 0 Unable to handle kernel paging request at virtual address fffffff0, epc == 8031536c, ra == 8031f280 Oops[#1]: ... Call Trace: [<8031536c>] ubi_leb_read+0x14/0x164 [<8031f280>] gluebi_read+0xf0/0x148 [<802edba8>] mtdblock_readsect+0x64/0x198 [<802ecfe4>] mtd_blktrans_thread+0x330/0x3f4 [<8005be98>] kthread+0x88/0x90 [<8000bc04>] kernel_thread_helper+0x10/0x18 Cc: stable@kernel.org [3.0+] Signed-off-by: Brian Norris <computersforpeace@gmail.com> Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
2011-11-08 07:51:05 +08:00
if (dev->open)
goto unlock;
mtd: allow to unload the mtdtrans module if its block devices aren't open Now it once again possible to remove mtdtrans module. You still need to ensure that block devices of that module aren't mounted. This is due to the fact that as long as a block device is open, it still exists, therefore if we were to allow module removal, this block device might became used again. This time in addition to code review, I also made the code pass some torture tests like module reload in a loop + read in a loop + card insert/removal all at same time. The blktrans_open/blktrans_release don't take the mtd table lock because: While device is added (that includes execution of add_mtd_blktrans_dev) the lock is already taken Now suppose the device will never be removed. In this case even if we have changes in mtd table, the entry that we need will stay exactly the same. (Note that we don't look at table at all, just following private pointer of block device). Now suppose that someone tries to remove the mtd device. This will be propagated to trans driver which _ought_ to call del_mtd_blktrans_dev which will take the per device lock, release the mtd device and set trans->mtd = NULL. >From this point on, following opens won't even be able to know anything about that mtd device (which at that point is likely not to exist) Also the same care is taken not to trip over NULL mtd pointer in blktrans_dev_release. Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com> Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
2010-10-15 23:20:43 +08:00
kref_get(&dev->ref);
__module_get(dev->tr->owner);
if (!dev->mtd)
goto unlock;
if (dev->tr->open) {
ret = dev->tr->open(dev);
if (ret)
goto error_put;
mtd: allow to unload the mtdtrans module if its block devices aren't open Now it once again possible to remove mtdtrans module. You still need to ensure that block devices of that module aren't mounted. This is due to the fact that as long as a block device is open, it still exists, therefore if we were to allow module removal, this block device might became used again. This time in addition to code review, I also made the code pass some torture tests like module reload in a loop + read in a loop + card insert/removal all at same time. The blktrans_open/blktrans_release don't take the mtd table lock because: While device is added (that includes execution of add_mtd_blktrans_dev) the lock is already taken Now suppose the device will never be removed. In this case even if we have changes in mtd table, the entry that we need will stay exactly the same. (Note that we don't look at table at all, just following private pointer of block device). Now suppose that someone tries to remove the mtd device. This will be propagated to trans driver which _ought_ to call del_mtd_blktrans_dev which will take the per device lock, release the mtd device and set trans->mtd = NULL. >From this point on, following opens won't even be able to know anything about that mtd device (which at that point is likely not to exist) Also the same care is taken not to trip over NULL mtd pointer in blktrans_dev_release. Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com> Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
2010-10-15 23:20:43 +08:00
}
ret = __get_mtd_device(dev->mtd);
if (ret)
goto error_release;
unlock:
mtd: mtd_blkdevs: don't increase 'open' count on error path Some error paths in mtd_blkdevs were fixed in the following commit: commit 94735ec4044a6d318b83ad3c5794e931ed168d10 mtd: mtd_blkdevs: fix error path in blktrans_open But on these error paths, the block device's `dev->open' count is already incremented before we check for errors. This meant that, while the error path was handled correctly on the first time through blktrans_open(), the device is erroneously considered already open on the second time through. This problem can be seen, for instance, when a UBI volume is simultaneously mounted as a UBIFS partition and read through its corresponding gluebi mtdblockX device. This results in blktrans_open() passing its error checks (with `dev->open > 0') without actually having a handle on the device. Here's a summarized log of the actions and results with nandsim: # modprobe nandsim # modprobe mtdblock # modprobe gluebi # modprobe ubifs # ubiattach /dev/ubi_ctrl -m 0 ... # ubimkvol /dev/ubi0 -N test -s 16MiB ... # mount -t ubifs ubi0:test /mnt # ls /dev/mtdblock* /dev/mtdblock0 /dev/mtdblock1 # cat /dev/mtdblock1 > /dev/null cat: can't open '/dev/mtdblock4': Device or resource busy # cat /dev/mtdblock1 > /dev/null CPU 0 Unable to handle kernel paging request at virtual address fffffff0, epc == 8031536c, ra == 8031f280 Oops[#1]: ... Call Trace: [<8031536c>] ubi_leb_read+0x14/0x164 [<8031f280>] gluebi_read+0xf0/0x148 [<802edba8>] mtdblock_readsect+0x64/0x198 [<802ecfe4>] mtd_blktrans_thread+0x330/0x3f4 [<8005be98>] kthread+0x88/0x90 [<8000bc04>] kernel_thread_helper+0x10/0x18 Cc: stable@kernel.org [3.0+] Signed-off-by: Brian Norris <computersforpeace@gmail.com> Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@linux.intel.com> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
2011-11-08 07:51:05 +08:00
dev->open++;
mutex_unlock(&dev->lock);
blktrans_dev_put(dev);
return ret;
error_release:
if (dev->tr->release)
dev->tr->release(dev);
error_put:
module_put(dev->tr->owner);
kref_put(&dev->ref, blktrans_dev_release);
mutex_unlock(&dev->lock);
blktrans_dev_put(dev);
return ret;
}
static int blktrans_release(struct gendisk *disk, fmode_t mode)
{
struct mtd_blktrans_dev *dev = blktrans_dev_get(disk);
mtd: allow to unload the mtdtrans module if its block devices aren't open Now it once again possible to remove mtdtrans module. You still need to ensure that block devices of that module aren't mounted. This is due to the fact that as long as a block device is open, it still exists, therefore if we were to allow module removal, this block device might became used again. This time in addition to code review, I also made the code pass some torture tests like module reload in a loop + read in a loop + card insert/removal all at same time. The blktrans_open/blktrans_release don't take the mtd table lock because: While device is added (that includes execution of add_mtd_blktrans_dev) the lock is already taken Now suppose the device will never be removed. In this case even if we have changes in mtd table, the entry that we need will stay exactly the same. (Note that we don't look at table at all, just following private pointer of block device). Now suppose that someone tries to remove the mtd device. This will be propagated to trans driver which _ought_ to call del_mtd_blktrans_dev which will take the per device lock, release the mtd device and set trans->mtd = NULL. >From this point on, following opens won't even be able to know anything about that mtd device (which at that point is likely not to exist) Also the same care is taken not to trip over NULL mtd pointer in blktrans_dev_release. Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com> Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
2010-10-15 23:20:43 +08:00
int ret = 0;
if (!dev)
return ret;
mutex_lock(&dev->lock);
mtd: allow to unload the mtdtrans module if its block devices aren't open Now it once again possible to remove mtdtrans module. You still need to ensure that block devices of that module aren't mounted. This is due to the fact that as long as a block device is open, it still exists, therefore if we were to allow module removal, this block device might became used again. This time in addition to code review, I also made the code pass some torture tests like module reload in a loop + read in a loop + card insert/removal all at same time. The blktrans_open/blktrans_release don't take the mtd table lock because: While device is added (that includes execution of add_mtd_blktrans_dev) the lock is already taken Now suppose the device will never be removed. In this case even if we have changes in mtd table, the entry that we need will stay exactly the same. (Note that we don't look at table at all, just following private pointer of block device). Now suppose that someone tries to remove the mtd device. This will be propagated to trans driver which _ought_ to call del_mtd_blktrans_dev which will take the per device lock, release the mtd device and set trans->mtd = NULL. >From this point on, following opens won't even be able to know anything about that mtd device (which at that point is likely not to exist) Also the same care is taken not to trip over NULL mtd pointer in blktrans_dev_release. Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com> Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
2010-10-15 23:20:43 +08:00
if (--dev->open)
goto unlock;
mtd: allow to unload the mtdtrans module if its block devices aren't open Now it once again possible to remove mtdtrans module. You still need to ensure that block devices of that module aren't mounted. This is due to the fact that as long as a block device is open, it still exists, therefore if we were to allow module removal, this block device might became used again. This time in addition to code review, I also made the code pass some torture tests like module reload in a loop + read in a loop + card insert/removal all at same time. The blktrans_open/blktrans_release don't take the mtd table lock because: While device is added (that includes execution of add_mtd_blktrans_dev) the lock is already taken Now suppose the device will never be removed. In this case even if we have changes in mtd table, the entry that we need will stay exactly the same. (Note that we don't look at table at all, just following private pointer of block device). Now suppose that someone tries to remove the mtd device. This will be propagated to trans driver which _ought_ to call del_mtd_blktrans_dev which will take the per device lock, release the mtd device and set trans->mtd = NULL. >From this point on, following opens won't even be able to know anything about that mtd device (which at that point is likely not to exist) Also the same care is taken not to trip over NULL mtd pointer in blktrans_dev_release. Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com> Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
2010-10-15 23:20:43 +08:00
kref_put(&dev->ref, blktrans_dev_release);
module_put(dev->tr->owner);
if (dev->mtd) {
ret = dev->tr->release ? dev->tr->release(dev) : 0;
__put_mtd_device(dev->mtd);
}
unlock:
mutex_unlock(&dev->lock);
blktrans_dev_put(dev);
return ret;
}
static int blktrans_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk);
int ret = -ENXIO;
if (!dev)
return ret;
mutex_lock(&dev->lock);
if (!dev->mtd)
goto unlock;
ret = dev->tr->getgeo ? dev->tr->getgeo(dev, geo) : 0;
unlock:
mutex_unlock(&dev->lock);
blktrans_dev_put(dev);
return ret;
}
static int blktrans_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk);
int ret = -ENXIO;
if (!dev)
return ret;
mutex_lock(&dev->lock);
if (!dev->mtd)
goto unlock;
switch (cmd) {
case BLKFLSBUF:
ret = dev->tr->flush ? dev->tr->flush(dev) : 0;
break;
default:
ret = -ENOTTY;
}
unlock:
mutex_unlock(&dev->lock);
blktrans_dev_put(dev);
return ret;
}
static const struct block_device_operations mtd_blktrans_ops = {
.owner = THIS_MODULE,
.open = blktrans_open,
.release = blktrans_release,
.ioctl = blktrans_ioctl,
.getgeo = blktrans_getgeo,
};
int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
{
struct mtd_blktrans_ops *tr = new->tr;
struct mtd_blktrans_dev *d;
int last_devnum = -1;
struct gendisk *gd;
int ret;
if (mutex_trylock(&mtd_table_mutex)) {
mutex_unlock(&mtd_table_mutex);
BUG();
}
mutex_lock(&blktrans_ref_mutex);
list_for_each_entry(d, &tr->devs, list) {
if (new->devnum == -1) {
/* Use first free number */
if (d->devnum != last_devnum+1) {
/* Found a free devnum. Plug it in here */
new->devnum = last_devnum+1;
list_add_tail(&new->list, &d->list);
goto added;
}
} else if (d->devnum == new->devnum) {
/* Required number taken */
mutex_unlock(&blktrans_ref_mutex);
return -EBUSY;
} else if (d->devnum > new->devnum) {
/* Required number was free */
list_add_tail(&new->list, &d->list);
goto added;
}
last_devnum = d->devnum;
}
ret = -EBUSY;
if (new->devnum == -1)
new->devnum = last_devnum+1;
/* Check that the device and any partitions will get valid
* minor numbers and that the disk naming code below can cope
* with this number. */
if (new->devnum > (MINORMASK >> tr->part_bits) ||
(tr->part_bits && new->devnum >= 27 * 26)) {
mutex_unlock(&blktrans_ref_mutex);
goto error1;
}
list_add_tail(&new->list, &tr->devs);
added:
mutex_unlock(&blktrans_ref_mutex);
mutex_init(&new->lock);
kref_init(&new->ref);
if (!tr->writesect)
new->readonly = 1;
/* Create gendisk */
ret = -ENOMEM;
gd = alloc_disk(1 << tr->part_bits);
if (!gd)
goto error2;
new->disk = gd;
gd->private_data = new;
gd->major = tr->major;
gd->first_minor = (new->devnum) << tr->part_bits;
gd->fops = &mtd_blktrans_ops;
if (tr->part_bits)
if (new->devnum < 26)
snprintf(gd->disk_name, sizeof(gd->disk_name),
"%s%c", tr->name, 'a' + new->devnum);
else
snprintf(gd->disk_name, sizeof(gd->disk_name),
"%s%c%c", tr->name,
'a' - 1 + new->devnum / 26,
'a' + new->devnum % 26);
else
snprintf(gd->disk_name, sizeof(gd->disk_name),
"%s%d", tr->name, new->devnum);
set_capacity(gd, (new->size * tr->blksize) >> 9);
/* Create the request queue */
spin_lock_init(&new->queue_lock);
new->rq = blk_init_queue(mtd_blktrans_request, &new->queue_lock);
if (!new->rq)
goto error3;
new->rq->queuedata = new;
blk_queue_logical_block_size(new->rq, tr->blksize);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
if (tr->discard) {
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq);
new->rq->limits.max_discard_sectors = UINT_MAX;
}
gd->queue = new->rq;
/* Create processing thread */
/* TODO: workqueue ? */
new->thread = kthread_run(mtd_blktrans_thread, new,
"%s%d", tr->name, new->mtd->index);
if (IS_ERR(new->thread)) {
ret = PTR_ERR(new->thread);
goto error4;
}
gd->driverfs_dev = &new->mtd->dev;
if (new->readonly)
set_disk_ro(gd, 1);
add_disk(gd);
if (new->disk_attributes) {
ret = sysfs_create_group(&disk_to_dev(gd)->kobj,
new->disk_attributes);
WARN_ON(ret);
}
return 0;
error4:
blk_cleanup_queue(new->rq);
error3:
put_disk(new->disk);
error2:
list_del(&new->list);
error1:
return ret;
}
int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
{
unsigned long flags;
if (mutex_trylock(&mtd_table_mutex)) {
mutex_unlock(&mtd_table_mutex);
BUG();
}
if (old->disk_attributes)
sysfs_remove_group(&disk_to_dev(old->disk)->kobj,
old->disk_attributes);
/* Stop new requests to arrive */
del_gendisk(old->disk);
/* Stop the thread */
kthread_stop(old->thread);
/* Kill current requests */
spin_lock_irqsave(&old->queue_lock, flags);
old->rq->queuedata = NULL;
blk_start_queue(old->rq);
spin_unlock_irqrestore(&old->queue_lock, flags);
mtd: allow to unload the mtdtrans module if its block devices aren't open Now it once again possible to remove mtdtrans module. You still need to ensure that block devices of that module aren't mounted. This is due to the fact that as long as a block device is open, it still exists, therefore if we were to allow module removal, this block device might became used again. This time in addition to code review, I also made the code pass some torture tests like module reload in a loop + read in a loop + card insert/removal all at same time. The blktrans_open/blktrans_release don't take the mtd table lock because: While device is added (that includes execution of add_mtd_blktrans_dev) the lock is already taken Now suppose the device will never be removed. In this case even if we have changes in mtd table, the entry that we need will stay exactly the same. (Note that we don't look at table at all, just following private pointer of block device). Now suppose that someone tries to remove the mtd device. This will be propagated to trans driver which _ought_ to call del_mtd_blktrans_dev which will take the per device lock, release the mtd device and set trans->mtd = NULL. >From this point on, following opens won't even be able to know anything about that mtd device (which at that point is likely not to exist) Also the same care is taken not to trip over NULL mtd pointer in blktrans_dev_release. Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com> Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
2010-10-15 23:20:43 +08:00
/* If the device is currently open, tell trans driver to close it,
then put mtd device, and don't touch it again */
mutex_lock(&old->lock);
mtd: allow to unload the mtdtrans module if its block devices aren't open Now it once again possible to remove mtdtrans module. You still need to ensure that block devices of that module aren't mounted. This is due to the fact that as long as a block device is open, it still exists, therefore if we were to allow module removal, this block device might became used again. This time in addition to code review, I also made the code pass some torture tests like module reload in a loop + read in a loop + card insert/removal all at same time. The blktrans_open/blktrans_release don't take the mtd table lock because: While device is added (that includes execution of add_mtd_blktrans_dev) the lock is already taken Now suppose the device will never be removed. In this case even if we have changes in mtd table, the entry that we need will stay exactly the same. (Note that we don't look at table at all, just following private pointer of block device). Now suppose that someone tries to remove the mtd device. This will be propagated to trans driver which _ought_ to call del_mtd_blktrans_dev which will take the per device lock, release the mtd device and set trans->mtd = NULL. >From this point on, following opens won't even be able to know anything about that mtd device (which at that point is likely not to exist) Also the same care is taken not to trip over NULL mtd pointer in blktrans_dev_release. Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com> Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
2010-10-15 23:20:43 +08:00
if (old->open) {
if (old->tr->release)
old->tr->release(old);
__put_mtd_device(old->mtd);
}
old->mtd = NULL;
mutex_unlock(&old->lock);
blktrans_dev_put(old);
return 0;
}
static void blktrans_notify_remove(struct mtd_info *mtd)
{
struct mtd_blktrans_ops *tr;
struct mtd_blktrans_dev *dev, *next;
list_for_each_entry(tr, &blktrans_majors, list)
list_for_each_entry_safe(dev, next, &tr->devs, list)
if (dev->mtd == mtd)
tr->remove_dev(dev);
}
static void blktrans_notify_add(struct mtd_info *mtd)
{
struct mtd_blktrans_ops *tr;
if (mtd->type == MTD_ABSENT)
return;
list_for_each_entry(tr, &blktrans_majors, list)
tr->add_mtd(tr, mtd);
}
static struct mtd_notifier blktrans_notifier = {
.add = blktrans_notify_add,
.remove = blktrans_notify_remove,
};
int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
{
struct mtd_info *mtd;
int ret;
/* Register the notifier if/when the first device type is
registered, to prevent the link/init ordering from fucking
us over. */
if (!blktrans_notifier.list.next)
register_mtd_user(&blktrans_notifier);
mutex_lock(&mtd_table_mutex);
ret = register_blkdev(tr->major, tr->name);
if (ret < 0) {
printk(KERN_WARNING "Unable to register %s block device on major %d: %d\n",
tr->name, tr->major, ret);
mutex_unlock(&mtd_table_mutex);
return ret;
}
if (ret)
tr->major = ret;
tr->blkshift = ffs(tr->blksize) - 1;
INIT_LIST_HEAD(&tr->devs);
list_add(&tr->list, &blktrans_majors);
mtd_for_each_device(mtd)
if (mtd->type != MTD_ABSENT)
tr->add_mtd(tr, mtd);
mutex_unlock(&mtd_table_mutex);
return 0;
}
int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr)
{
struct mtd_blktrans_dev *dev, *next;
mutex_lock(&mtd_table_mutex);
/* Remove it from the list of active majors */
list_del(&tr->list);
list_for_each_entry_safe(dev, next, &tr->devs, list)
tr->remove_dev(dev);
unregister_blkdev(tr->major, tr->name);
mutex_unlock(&mtd_table_mutex);
BUG_ON(!list_empty(&tr->devs));
return 0;
}
static void __exit mtd_blktrans_exit(void)
{
/* No race here -- if someone's currently in register_mtd_blktrans
we're screwed anyway. */
if (blktrans_notifier.list.next)
unregister_mtd_user(&blktrans_notifier);
}
module_exit(mtd_blktrans_exit);
EXPORT_SYMBOL_GPL(register_mtd_blktrans);
EXPORT_SYMBOL_GPL(deregister_mtd_blktrans);
EXPORT_SYMBOL_GPL(add_mtd_blktrans_dev);
EXPORT_SYMBOL_GPL(del_mtd_blktrans_dev);
MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Common interface to block layer for MTD 'translation layers'");