Merge branch 'for-4.1/drivers' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe:
 "This is the block driver pull request for 4.1.  As with the core bits,
  this is a relatively slow round.  This pull request contains:

   - Various fixes and cleanups for NVMe, from Alexey Khoroshilov, Chong
     Yuan, myself, Keith Busch, and Murali Iyer.

   - Documentation and code cleanups for nbd from Markus Pargmann.

   - Change of brd maintainer to me, from Ross Zwisler.  At least the
     email doesn't bounce anymore then.

   - Two xen-blkback fixes from Tao Chen"

* 'for-4.1/drivers' of git://git.kernel.dk/linux-block: (23 commits)
  NVMe: Meta data handling through submit io ioctl
  NVMe: Add translation for block limits
  NVMe: Remove check for null
  NVMe: Fix error handling of class_create("nvme")
  xen-blkback: define pr_fmt macro to avoid the duplication of DRV_PFX
  xen-blkback: enlarge the array size of blkback name
  nbd: Return error pointer directly
  nbd: Return error code directly
  nbd: Remove fixme that was already fixed
  nbd: Restructure debugging prints
  nbd: Fix device bytesize type
  nbd: Replace kthread_create with kthread_run
  nbd: Remove kernel internal header
  Documentation: nbd: Add list of module parameters
  Documentation: nbd: Reformat to allow more documentation
  NVMe: increase depth of admin queue
  nvme: Fix PRP list calculation for non-4k system page size
  NVMe: Fix blk-mq hot cpu notification
  NVMe: embedded iod mask cleanup
  NVMe: Freeze admin queue on device failure
  ...
This commit is contained in:
Linus Torvalds 2015-04-16 22:05:27 -04:00
commit a39ef1a7c6
12 changed files with 240 additions and 300 deletions

View File

@ -1,17 +1,31 @@
Network Block Device (TCP version) Network Block Device (TCP version)
==================================
What is it: With this compiled in the kernel (or as a module), Linux
can use a remote server as one of its block devices. So every time
the client computer wants to read, e.g., /dev/nb0, it sends a
request over TCP to the server, which will reply with the data read.
This can be used for stations with low disk space (or even diskless)
to borrow disk space from another computer.
Unlike NFS, it is possible to put any filesystem on it, etc.
For more information, or to download the nbd-client and nbd-server 1) Overview
tools, go to http://nbd.sf.net/. -----------
What is it: With this compiled in the kernel (or as a module), Linux
can use a remote server as one of its block devices. So every time
the client computer wants to read, e.g., /dev/nb0, it sends a
request over TCP to the server, which will reply with the data read.
This can be used for stations with low disk space (or even diskless)
to borrow disk space from another computer.
Unlike NFS, it is possible to put any filesystem on it, etc.
For more information, or to download the nbd-client and nbd-server
tools, go to http://nbd.sf.net/.
The nbd kernel module need only be installed on the client
system, as the nbd-server is completely in userspace. In fact,
the nbd-server has been successfully ported to other operating
systems, including Windows.
A) NBD parameters
-----------------
max_part
Number of partitions per device (default: 0).
nbds_max
Number of block devices that should be initialized (default: 16).
The nbd kernel module need only be installed on the client
system, as the nbd-server is completely in userspace. In fact,
the nbd-server has been successfully ported to other operating
systems, including Windows.

View File

@ -8107,7 +8107,7 @@ S: Maintained
F: drivers/net/wireless/rt2x00/ F: drivers/net/wireless/rt2x00/
RAMDISK RAM BLOCK DEVICE DRIVER RAMDISK RAM BLOCK DEVICE DRIVER
M: Nick Piggin <npiggin@kernel.dk> M: Jens Axboe <axboe@kernel.dk>
S: Maintained S: Maintained
F: Documentation/blockdev/ramdisk.txt F: Documentation/blockdev/ramdisk.txt
F: drivers/block/brd.c F: drivers/block/brd.c

View File

@ -2107,13 +2107,12 @@ static int drbd_create_mempools(void)
if (drbd_md_io_page_pool == NULL) if (drbd_md_io_page_pool == NULL)
goto Enomem; goto Enomem;
drbd_request_mempool = mempool_create(number, drbd_request_mempool = mempool_create_slab_pool(number,
mempool_alloc_slab, mempool_free_slab, drbd_request_cache); drbd_request_cache);
if (drbd_request_mempool == NULL) if (drbd_request_mempool == NULL)
goto Enomem; goto Enomem;
drbd_ee_mempool = mempool_create(number, drbd_ee_mempool = mempool_create_slab_pool(number, drbd_ee_cache);
mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
if (drbd_ee_mempool == NULL) if (drbd_ee_mempool == NULL)
goto Enomem; goto Enomem;

View File

@ -52,9 +52,10 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
{ {
struct drbd_request *req; struct drbd_request *req;
req = mempool_alloc(drbd_request_mempool, GFP_NOIO | __GFP_ZERO); req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
if (!req) if (!req)
return NULL; return NULL;
memset(req, 0, sizeof(*req));
drbd_req_make_private_bio(req, bio_src); drbd_req_make_private_bio(req, bio_src);
req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0; req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;

View File

@ -32,28 +32,36 @@
#include <net/sock.h> #include <net/sock.h>
#include <linux/net.h> #include <linux/net.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/types.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/types.h> #include <asm/types.h>
#include <linux/nbd.h> #include <linux/nbd.h>
#define NBD_MAGIC 0x68797548 struct nbd_device {
int flags;
int harderror; /* Code of hard error */
struct socket * sock; /* If == NULL, device is not ready, yet */
int magic;
#ifdef NDEBUG spinlock_t queue_lock;
#define dprintk(flags, fmt...) struct list_head queue_head; /* Requests waiting result */
#else /* NDEBUG */ struct request *active_req;
#define dprintk(flags, fmt...) do { \ wait_queue_head_t active_wq;
if (debugflags & (flags)) printk(KERN_DEBUG fmt); \ struct list_head waiting_queue; /* Requests to be sent */
} while (0) wait_queue_head_t waiting_wq;
#define DBG_IOCTL 0x0004
#define DBG_INIT 0x0010 struct mutex tx_lock;
#define DBG_EXIT 0x0020 struct gendisk *disk;
#define DBG_BLKDEV 0x0100 int blksize;
#define DBG_RX 0x0200 loff_t bytesize;
#define DBG_TX 0x0400 pid_t pid; /* pid of nbd-client, if attached */
static unsigned int debugflags; int xmit_timeout;
#endif /* NDEBUG */ int disconnect; /* a disconnect has been requested by user */
};
#define NBD_MAGIC 0x68797548
static unsigned int nbds_max = 16; static unsigned int nbds_max = 16;
static struct nbd_device *nbd_dev; static struct nbd_device *nbd_dev;
@ -71,25 +79,9 @@ static int max_part;
*/ */
static DEFINE_SPINLOCK(nbd_lock); static DEFINE_SPINLOCK(nbd_lock);
#ifndef NDEBUG static inline struct device *nbd_to_dev(struct nbd_device *nbd)
static const char *ioctl_cmd_to_ascii(int cmd)
{ {
switch (cmd) { return disk_to_dev(nbd->disk);
case NBD_SET_SOCK: return "set-sock";
case NBD_SET_BLKSIZE: return "set-blksize";
case NBD_SET_SIZE: return "set-size";
case NBD_SET_TIMEOUT: return "set-timeout";
case NBD_SET_FLAGS: return "set-flags";
case NBD_DO_IT: return "do-it";
case NBD_CLEAR_SOCK: return "clear-sock";
case NBD_CLEAR_QUE: return "clear-que";
case NBD_PRINT_DEBUG: return "print-debug";
case NBD_SET_SIZE_BLOCKS: return "set-size-blocks";
case NBD_DISCONNECT: return "disconnect";
case BLKROSET: return "set-read-only";
case BLKFLSBUF: return "flush-buffer-cache";
}
return "unknown";
} }
static const char *nbdcmd_to_ascii(int cmd) static const char *nbdcmd_to_ascii(int cmd)
@ -103,30 +95,26 @@ static const char *nbdcmd_to_ascii(int cmd)
} }
return "invalid"; return "invalid";
} }
#endif /* NDEBUG */
static void nbd_end_request(struct request *req) static void nbd_end_request(struct nbd_device *nbd, struct request *req)
{ {
int error = req->errors ? -EIO : 0; int error = req->errors ? -EIO : 0;
struct request_queue *q = req->q; struct request_queue *q = req->q;
unsigned long flags; unsigned long flags;
dprintk(DBG_BLKDEV, "%s: request %p: %s\n", req->rq_disk->disk_name, dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", req,
req, error ? "failed" : "done"); error ? "failed" : "done");
spin_lock_irqsave(q->queue_lock, flags); spin_lock_irqsave(q->queue_lock, flags);
__blk_end_request_all(req, error); __blk_end_request_all(req, error);
spin_unlock_irqrestore(q->queue_lock, flags); spin_unlock_irqrestore(q->queue_lock, flags);
} }
/*
* Forcibly shutdown the socket causing all listeners to error
*/
static void sock_shutdown(struct nbd_device *nbd, int lock) static void sock_shutdown(struct nbd_device *nbd, int lock)
{ {
/* Forcibly shutdown the socket causing all listeners
* to error
*
* FIXME: This code is duplicated from sys_shutdown, but
* there should be a more generic interface rather than
* calling socket ops directly here */
if (lock) if (lock)
mutex_lock(&nbd->tx_lock); mutex_lock(&nbd->tx_lock);
if (nbd->sock) { if (nbd->sock) {
@ -253,17 +241,15 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
} }
memcpy(request.handle, &req, sizeof(req)); memcpy(request.handle, &req, sizeof(req));
dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n", dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
nbd->disk->disk_name, req, req, nbdcmd_to_ascii(nbd_cmd(req)),
nbdcmd_to_ascii(nbd_cmd(req)), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
(unsigned long long)blk_rq_pos(req) << 9,
blk_rq_bytes(req));
result = sock_xmit(nbd, 1, &request, sizeof(request), result = sock_xmit(nbd, 1, &request, sizeof(request),
(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0); (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
if (result <= 0) { if (result <= 0) {
dev_err(disk_to_dev(nbd->disk), dev_err(disk_to_dev(nbd->disk),
"Send control failed (result %d)\n", result); "Send control failed (result %d)\n", result);
goto error_out; return -EIO;
} }
if (nbd_cmd(req) == NBD_CMD_WRITE) { if (nbd_cmd(req) == NBD_CMD_WRITE) {
@ -277,21 +263,18 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
flags = 0; flags = 0;
if (!rq_iter_last(bvec, iter)) if (!rq_iter_last(bvec, iter))
flags = MSG_MORE; flags = MSG_MORE;
dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
nbd->disk->disk_name, req, bvec.bv_len); req, bvec.bv_len);
result = sock_send_bvec(nbd, &bvec, flags); result = sock_send_bvec(nbd, &bvec, flags);
if (result <= 0) { if (result <= 0) {
dev_err(disk_to_dev(nbd->disk), dev_err(disk_to_dev(nbd->disk),
"Send data failed (result %d)\n", "Send data failed (result %d)\n",
result); result);
goto error_out; return -EIO;
} }
} }
} }
return 0; return 0;
error_out:
return -EIO;
} }
static struct request *nbd_find_request(struct nbd_device *nbd, static struct request *nbd_find_request(struct nbd_device *nbd,
@ -302,7 +285,7 @@ static struct request *nbd_find_request(struct nbd_device *nbd,
err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq); err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq);
if (unlikely(err)) if (unlikely(err))
goto out; return ERR_PTR(err);
spin_lock(&nbd->queue_lock); spin_lock(&nbd->queue_lock);
list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) { list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) {
@ -314,10 +297,7 @@ static struct request *nbd_find_request(struct nbd_device *nbd,
} }
spin_unlock(&nbd->queue_lock); spin_unlock(&nbd->queue_lock);
err = -ENOENT; return ERR_PTR(-ENOENT);
out:
return ERR_PTR(err);
} }
static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
@ -371,8 +351,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
return req; return req;
} }
dprintk(DBG_RX, "%s: request %p: got reply\n", dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
nbd->disk->disk_name, req);
if (nbd_cmd(req) == NBD_CMD_READ) { if (nbd_cmd(req) == NBD_CMD_READ) {
struct req_iterator iter; struct req_iterator iter;
struct bio_vec bvec; struct bio_vec bvec;
@ -385,8 +364,8 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
req->errors++; req->errors++;
return req; return req;
} }
dprintk(DBG_RX, "%s: request %p: got %d bytes data\n", dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
nbd->disk->disk_name, req, bvec.bv_len); req, bvec.bv_len);
} }
} }
return req; return req;
@ -426,7 +405,7 @@ static int nbd_do_it(struct nbd_device *nbd)
} }
while ((req = nbd_read_stat(nbd)) != NULL) while ((req = nbd_read_stat(nbd)) != NULL)
nbd_end_request(req); nbd_end_request(nbd, req);
device_remove_file(disk_to_dev(nbd->disk), &pid_attr); device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
nbd->pid = 0; nbd->pid = 0;
@ -455,7 +434,7 @@ static void nbd_clear_que(struct nbd_device *nbd)
queuelist); queuelist);
list_del_init(&req->queuelist); list_del_init(&req->queuelist);
req->errors++; req->errors++;
nbd_end_request(req); nbd_end_request(nbd, req);
} }
while (!list_empty(&nbd->waiting_queue)) { while (!list_empty(&nbd->waiting_queue)) {
@ -463,7 +442,7 @@ static void nbd_clear_que(struct nbd_device *nbd)
queuelist); queuelist);
list_del_init(&req->queuelist); list_del_init(&req->queuelist);
req->errors++; req->errors++;
nbd_end_request(req); nbd_end_request(nbd, req);
} }
} }
@ -507,7 +486,7 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
if (nbd_send_req(nbd, req) != 0) { if (nbd_send_req(nbd, req) != 0) {
dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
req->errors++; req->errors++;
nbd_end_request(req); nbd_end_request(nbd, req);
} else { } else {
spin_lock(&nbd->queue_lock); spin_lock(&nbd->queue_lock);
list_add_tail(&req->queuelist, &nbd->queue_head); list_add_tail(&req->queuelist, &nbd->queue_head);
@ -522,7 +501,7 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
error_out: error_out:
req->errors++; req->errors++;
nbd_end_request(req); nbd_end_request(nbd, req);
} }
static int nbd_thread(void *data) static int nbd_thread(void *data)
@ -570,18 +549,18 @@ static void do_nbd_request(struct request_queue *q)
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n",
req->rq_disk->disk_name, req, req->cmd_type);
nbd = req->rq_disk->private_data; nbd = req->rq_disk->private_data;
BUG_ON(nbd->magic != NBD_MAGIC); BUG_ON(nbd->magic != NBD_MAGIC);
dev_dbg(nbd_to_dev(nbd), "request %p: dequeued (flags=%x)\n",
req, req->cmd_type);
if (unlikely(!nbd->sock)) { if (unlikely(!nbd->sock)) {
dev_err(disk_to_dev(nbd->disk), dev_err(disk_to_dev(nbd->disk),
"Attempted send on closed socket\n"); "Attempted send on closed socket\n");
req->errors++; req->errors++;
nbd_end_request(req); nbd_end_request(nbd, req);
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
continue; continue;
} }
@ -706,13 +685,13 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
else else
blk_queue_flush(nbd->disk->queue, 0); blk_queue_flush(nbd->disk->queue, 0);
thread = kthread_create(nbd_thread, nbd, "%s", thread = kthread_run(nbd_thread, nbd, "%s",
nbd->disk->disk_name); nbd->disk->disk_name);
if (IS_ERR(thread)) { if (IS_ERR(thread)) {
mutex_lock(&nbd->tx_lock); mutex_lock(&nbd->tx_lock);
return PTR_ERR(thread); return PTR_ERR(thread);
} }
wake_up_process(thread);
error = nbd_do_it(nbd); error = nbd_do_it(nbd);
kthread_stop(thread); kthread_stop(thread);
@ -768,10 +747,6 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
BUG_ON(nbd->magic != NBD_MAGIC); BUG_ON(nbd->magic != NBD_MAGIC);
/* Anyone capable of this syscall can do *real bad* things */
dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
nbd->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
mutex_lock(&nbd->tx_lock); mutex_lock(&nbd->tx_lock);
error = __nbd_ioctl(bdev, nbd, cmd, arg); error = __nbd_ioctl(bdev, nbd, cmd, arg);
mutex_unlock(&nbd->tx_lock); mutex_unlock(&nbd->tx_lock);
@ -861,7 +836,6 @@ static int __init nbd_init(void)
} }
printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR); printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
dprintk(DBG_INIT, "nbd: debugflags=0x%x\n", debugflags);
for (i = 0; i < nbds_max; i++) { for (i = 0; i < nbds_max; i++) {
struct gendisk *disk = nbd_dev[i].disk; struct gendisk *disk = nbd_dev[i].disk;
@ -920,7 +894,3 @@ module_param(nbds_max, int, 0444);
MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)"); MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
module_param(max_part, int, 0444); module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)"); MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");
#ifndef NDEBUG
module_param(debugflags, int, 0644);
MODULE_PARM_DESC(debugflags, "flags for controlling debug output");
#endif

View File

@ -44,7 +44,7 @@
#define NVME_MINORS (1U << MINORBITS) #define NVME_MINORS (1U << MINORBITS)
#define NVME_Q_DEPTH 1024 #define NVME_Q_DEPTH 1024
#define NVME_AQ_DEPTH 64 #define NVME_AQ_DEPTH 256
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
#define ADMIN_TIMEOUT (admin_timeout * HZ) #define ADMIN_TIMEOUT (admin_timeout * HZ)
@ -152,6 +152,7 @@ struct nvme_cmd_info {
*/ */
#define NVME_INT_PAGES 2 #define NVME_INT_PAGES 2
#define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->page_size) #define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->page_size)
#define NVME_INT_MASK 0x01
/* /*
* Will slightly overestimate the number of pages needed. This is OK * Will slightly overestimate the number of pages needed. This is OK
@ -257,7 +258,7 @@ static void *iod_get_private(struct nvme_iod *iod)
*/ */
static bool iod_should_kfree(struct nvme_iod *iod) static bool iod_should_kfree(struct nvme_iod *iod)
{ {
return (iod->private & 0x01) == 0; return (iod->private & NVME_INT_MASK) == 0;
} }
/* Special values must be less than 0x1000 */ /* Special values must be less than 0x1000 */
@ -301,8 +302,6 @@ static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn)
static void async_req_completion(struct nvme_queue *nvmeq, void *ctx, static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
struct nvme_completion *cqe) struct nvme_completion *cqe)
{ {
struct request *req = ctx;
u32 result = le32_to_cpup(&cqe->result); u32 result = le32_to_cpup(&cqe->result);
u16 status = le16_to_cpup(&cqe->status) >> 1; u16 status = le16_to_cpup(&cqe->status) >> 1;
@ -311,8 +310,6 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
if (status == NVME_SC_SUCCESS) if (status == NVME_SC_SUCCESS)
dev_warn(nvmeq->q_dmadev, dev_warn(nvmeq->q_dmadev,
"async event result %08x\n", result); "async event result %08x\n", result);
blk_mq_free_hctx_request(nvmeq->hctx, req);
} }
static void abort_completion(struct nvme_queue *nvmeq, void *ctx, static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@ -432,7 +429,6 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
{ {
unsigned size = !(rq->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(rq) : unsigned size = !(rq->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(rq) :
sizeof(struct nvme_dsm_range); sizeof(struct nvme_dsm_range);
unsigned long mask = 0;
struct nvme_iod *iod; struct nvme_iod *iod;
if (rq->nr_phys_segments <= NVME_INT_PAGES && if (rq->nr_phys_segments <= NVME_INT_PAGES &&
@ -440,9 +436,8 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(rq); struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(rq);
iod = cmd->iod; iod = cmd->iod;
mask = 0x01;
iod_init(iod, size, rq->nr_phys_segments, iod_init(iod, size, rq->nr_phys_segments,
(unsigned long) rq | 0x01); (unsigned long) rq | NVME_INT_MASK);
return iod; return iod;
} }
@ -522,8 +517,6 @@ static void nvme_dif_remap(struct request *req,
return; return;
pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset; pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset;
if (!pmap)
return;
p = pmap; p = pmap;
virt = bip_get_seed(bip); virt = bip_get_seed(bip);
@ -645,12 +638,12 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
struct scatterlist *sg = iod->sg; struct scatterlist *sg = iod->sg;
int dma_len = sg_dma_len(sg); int dma_len = sg_dma_len(sg);
u64 dma_addr = sg_dma_address(sg); u64 dma_addr = sg_dma_address(sg);
int offset = offset_in_page(dma_addr); u32 page_size = dev->page_size;
int offset = dma_addr & (page_size - 1);
__le64 *prp_list; __le64 *prp_list;
__le64 **list = iod_list(iod); __le64 **list = iod_list(iod);
dma_addr_t prp_dma; dma_addr_t prp_dma;
int nprps, i; int nprps, i;
u32 page_size = dev->page_size;
length -= (page_size - offset); length -= (page_size - offset);
if (length <= 0) if (length <= 0)
@ -1028,18 +1021,19 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
struct nvme_cmd_info *cmd_info; struct nvme_cmd_info *cmd_info;
struct request *req; struct request *req;
req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, false); req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
req->cmd_flags |= REQ_NO_TIMEOUT; req->cmd_flags |= REQ_NO_TIMEOUT;
cmd_info = blk_mq_rq_to_pdu(req); cmd_info = blk_mq_rq_to_pdu(req);
nvme_set_info(cmd_info, req, async_req_completion); nvme_set_info(cmd_info, NULL, async_req_completion);
memset(&c, 0, sizeof(c)); memset(&c, 0, sizeof(c));
c.common.opcode = nvme_admin_async_event; c.common.opcode = nvme_admin_async_event;
c.common.command_id = req->tag; c.common.command_id = req->tag;
blk_mq_free_hctx_request(nvmeq->hctx, req);
return __nvme_submit_cmd(nvmeq, &c); return __nvme_submit_cmd(nvmeq, &c);
} }
@ -1347,6 +1341,9 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
nvmeq->cq_vector = -1; nvmeq->cq_vector = -1;
spin_unlock_irq(&nvmeq->q_lock); spin_unlock_irq(&nvmeq->q_lock);
if (!nvmeq->qid && nvmeq->dev->admin_q)
blk_mq_freeze_queue_start(nvmeq->dev->admin_q);
irq_set_affinity_hint(vector, NULL); irq_set_affinity_hint(vector, NULL);
free_irq(vector, nvmeq); free_irq(vector, nvmeq);
@ -1378,8 +1375,6 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
adapter_delete_sq(dev, qid); adapter_delete_sq(dev, qid);
adapter_delete_cq(dev, qid); adapter_delete_cq(dev, qid);
} }
if (!qid && dev->admin_q)
blk_mq_freeze_queue_start(dev->admin_q);
spin_lock_irq(&nvmeq->q_lock); spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq); nvme_process_cq(nvmeq);
@ -1583,6 +1578,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->admin_tagset.ops = &nvme_mq_admin_ops; dev->admin_tagset.ops = &nvme_mq_admin_ops;
dev->admin_tagset.nr_hw_queues = 1; dev->admin_tagset.nr_hw_queues = 1;
dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1; dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
dev->admin_tagset.reserved_tags = 1;
dev->admin_tagset.timeout = ADMIN_TIMEOUT; dev->admin_tagset.timeout = ADMIN_TIMEOUT;
dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev); dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
dev->admin_tagset.cmd_size = nvme_cmd_size(dev); dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
@ -1749,25 +1745,31 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
struct nvme_dev *dev = ns->dev; struct nvme_dev *dev = ns->dev;
struct nvme_user_io io; struct nvme_user_io io;
struct nvme_command c; struct nvme_command c;
unsigned length, meta_len; unsigned length, meta_len, prp_len;
int status, i; int status, write;
struct nvme_iod *iod, *meta_iod = NULL; struct nvme_iod *iod;
dma_addr_t meta_dma_addr; dma_addr_t meta_dma = 0;
void *meta, *uninitialized_var(meta_mem); void *meta = NULL;
if (copy_from_user(&io, uio, sizeof(io))) if (copy_from_user(&io, uio, sizeof(io)))
return -EFAULT; return -EFAULT;
length = (io.nblocks + 1) << ns->lba_shift; length = (io.nblocks + 1) << ns->lba_shift;
meta_len = (io.nblocks + 1) * ns->ms; meta_len = (io.nblocks + 1) * ns->ms;
if (meta_len && ((io.metadata & 3) || !io.metadata)) if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext)
return -EINVAL; return -EINVAL;
else if (meta_len && ns->ext) {
length += meta_len;
meta_len = 0;
}
write = io.opcode & 1;
switch (io.opcode) { switch (io.opcode) {
case nvme_cmd_write: case nvme_cmd_write:
case nvme_cmd_read: case nvme_cmd_read:
case nvme_cmd_compare: case nvme_cmd_compare:
iod = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length); iod = nvme_map_user_pages(dev, write, io.addr, length);
break; break;
default: default:
return -EINVAL; return -EINVAL;
@ -1776,6 +1778,27 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
if (IS_ERR(iod)) if (IS_ERR(iod))
return PTR_ERR(iod); return PTR_ERR(iod);
prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
if (length != prp_len) {
status = -ENOMEM;
goto unmap;
}
if (meta_len) {
meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
&meta_dma, GFP_KERNEL);
if (!meta) {
status = -ENOMEM;
goto unmap;
}
if (write) {
if (copy_from_user(meta, (void __user *)io.metadata,
meta_len)) {
status = -EFAULT;
goto unmap;
}
}
}
memset(&c, 0, sizeof(c)); memset(&c, 0, sizeof(c));
c.rw.opcode = io.opcode; c.rw.opcode = io.opcode;
c.rw.flags = io.flags; c.rw.flags = io.flags;
@ -1787,75 +1810,21 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.reftag = cpu_to_le32(io.reftag); c.rw.reftag = cpu_to_le32(io.reftag);
c.rw.apptag = cpu_to_le16(io.apptag); c.rw.apptag = cpu_to_le16(io.apptag);
c.rw.appmask = cpu_to_le16(io.appmask); c.rw.appmask = cpu_to_le16(io.appmask);
if (meta_len) {
meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata,
meta_len);
if (IS_ERR(meta_iod)) {
status = PTR_ERR(meta_iod);
meta_iod = NULL;
goto unmap;
}
meta_mem = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
&meta_dma_addr, GFP_KERNEL);
if (!meta_mem) {
status = -ENOMEM;
goto unmap;
}
if (io.opcode & 1) {
int meta_offset = 0;
for (i = 0; i < meta_iod->nents; i++) {
meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
meta_iod->sg[i].offset;
memcpy(meta_mem + meta_offset, meta,
meta_iod->sg[i].length);
kunmap_atomic(meta);
meta_offset += meta_iod->sg[i].length;
}
}
c.rw.metadata = cpu_to_le64(meta_dma_addr);
}
length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
c.rw.prp2 = cpu_to_le64(iod->first_dma); c.rw.prp2 = cpu_to_le64(iod->first_dma);
c.rw.metadata = cpu_to_le64(meta_dma);
if (length != (io.nblocks + 1) << ns->lba_shift) status = nvme_submit_io_cmd(dev, ns, &c, NULL);
status = -ENOMEM;
else
status = nvme_submit_io_cmd(dev, ns, &c, NULL);
if (meta_len) {
if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) {
int meta_offset = 0;
for (i = 0; i < meta_iod->nents; i++) {
meta = kmap_atomic(sg_page(&meta_iod->sg[i])) +
meta_iod->sg[i].offset;
memcpy(meta, meta_mem + meta_offset,
meta_iod->sg[i].length);
kunmap_atomic(meta);
meta_offset += meta_iod->sg[i].length;
}
}
dma_free_coherent(&dev->pci_dev->dev, meta_len, meta_mem,
meta_dma_addr);
}
unmap: unmap:
nvme_unmap_user_pages(dev, io.opcode & 1, iod); nvme_unmap_user_pages(dev, write, iod);
nvme_free_iod(dev, iod); nvme_free_iod(dev, iod);
if (meta) {
if (meta_iod) { if (status == NVME_SC_SUCCESS && !write) {
nvme_unmap_user_pages(dev, io.opcode & 1, meta_iod); if (copy_to_user((void __user *)io.metadata, meta,
nvme_free_iod(dev, meta_iod); meta_len))
status = -EFAULT;
}
dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
} }
return status; return status;
} }
@ -2018,7 +1987,8 @@ static int nvme_revalidate_disk(struct gendisk *disk)
struct nvme_dev *dev = ns->dev; struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id; struct nvme_id_ns *id;
dma_addr_t dma_addr; dma_addr_t dma_addr;
int lbaf, pi_type, old_ms; u8 lbaf, pi_type;
u16 old_ms;
unsigned short bs; unsigned short bs;
id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr, id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
@ -2039,6 +2009,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
ns->lba_shift = id->lbaf[lbaf].ds; ns->lba_shift = id->lbaf[lbaf].ds;
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/* /*
* If identify namespace failed, use default 512 byte block size so * If identify namespace failed, use default 512 byte block size so
@ -2055,14 +2026,14 @@ static int nvme_revalidate_disk(struct gendisk *disk)
if (blk_get_integrity(disk) && (ns->pi_type != pi_type || if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
ns->ms != old_ms || ns->ms != old_ms ||
bs != queue_logical_block_size(disk->queue) || bs != queue_logical_block_size(disk->queue) ||
(ns->ms && id->flbas & NVME_NS_FLBAS_META_EXT))) (ns->ms && ns->ext)))
blk_integrity_unregister(disk); blk_integrity_unregister(disk);
ns->pi_type = pi_type; ns->pi_type = pi_type;
blk_queue_logical_block_size(ns->queue, bs); blk_queue_logical_block_size(ns->queue, bs);
if (ns->ms && !blk_get_integrity(disk) && (disk->flags & GENHD_FL_UP) && if (ns->ms && !blk_get_integrity(disk) && (disk->flags & GENHD_FL_UP) &&
!(id->flbas & NVME_NS_FLBAS_META_EXT)) !ns->ext)
nvme_init_integrity(ns); nvme_init_integrity(ns);
if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk))) if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
@ -2334,7 +2305,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
dev->oncs = le16_to_cpup(&ctrl->oncs); dev->oncs = le16_to_cpup(&ctrl->oncs);
dev->abort_limit = ctrl->acl + 1; dev->abort_limit = ctrl->acl + 1;
dev->vwc = ctrl->vwc; dev->vwc = ctrl->vwc;
dev->event_limit = min(ctrl->aerl + 1, 8);
memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
@ -2881,6 +2851,7 @@ static int nvme_dev_start(struct nvme_dev *dev)
nvme_set_irq_hints(dev); nvme_set_irq_hints(dev);
dev->event_limit = 1;
return result; return result;
free_tags: free_tags:
@ -3166,8 +3137,10 @@ static int __init nvme_init(void)
nvme_char_major = result; nvme_char_major = result;
nvme_class = class_create(THIS_MODULE, "nvme"); nvme_class = class_create(THIS_MODULE, "nvme");
if (!nvme_class) if (IS_ERR(nvme_class)) {
result = PTR_ERR(nvme_class);
goto unregister_chrdev; goto unregister_chrdev;
}
result = pci_register_driver(&nvme_driver); result = pci_register_driver(&nvme_driver);
if (result) if (result)

View File

@ -55,6 +55,7 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define VPD_SERIAL_NUMBER 0x80 #define VPD_SERIAL_NUMBER 0x80
#define VPD_DEVICE_IDENTIFIERS 0x83 #define VPD_DEVICE_IDENTIFIERS 0x83
#define VPD_EXTENDED_INQUIRY 0x86 #define VPD_EXTENDED_INQUIRY 0x86
#define VPD_BLOCK_LIMITS 0xB0
#define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1 #define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1
/* CDB offsets */ /* CDB offsets */
@ -132,9 +133,10 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define INQ_UNIT_SERIAL_NUMBER_PAGE 0x80 #define INQ_UNIT_SERIAL_NUMBER_PAGE 0x80
#define INQ_DEVICE_IDENTIFICATION_PAGE 0x83 #define INQ_DEVICE_IDENTIFICATION_PAGE 0x83
#define INQ_EXTENDED_INQUIRY_DATA_PAGE 0x86 #define INQ_EXTENDED_INQUIRY_DATA_PAGE 0x86
#define INQ_BDEV_LIMITS_PAGE 0xB0
#define INQ_BDEV_CHARACTERISTICS_PAGE 0xB1 #define INQ_BDEV_CHARACTERISTICS_PAGE 0xB1
#define INQ_SERIAL_NUMBER_LENGTH 0x14 #define INQ_SERIAL_NUMBER_LENGTH 0x14
#define INQ_NUM_SUPPORTED_VPD_PAGES 5 #define INQ_NUM_SUPPORTED_VPD_PAGES 6
#define VERSION_SPC_4 0x06 #define VERSION_SPC_4 0x06
#define ACA_UNSUPPORTED 0 #define ACA_UNSUPPORTED 0
#define STANDARD_INQUIRY_LENGTH 36 #define STANDARD_INQUIRY_LENGTH 36
@ -747,6 +749,7 @@ static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE; inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE;
inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE; inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE;
inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE; inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE;
inq_response[9] = INQ_BDEV_LIMITS_PAGE;
xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
@ -938,6 +941,25 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
return res; return res;
} }
static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *inq_response, int alloc_len)
{
__be32 max_sectors = cpu_to_be32(queue_max_hw_sectors(ns->queue));
__be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
__be32 discard_desc_count = cpu_to_be32(0x100);
memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
inq_response[1] = VPD_BLOCK_LIMITS;
inq_response[3] = 0x3c; /* Page Length */
memcpy(&inq_response[8], &max_sectors, sizeof(u32));
memcpy(&inq_response[20], &max_discard, sizeof(u32));
if (max_discard)
memcpy(&inq_response[24], &discard_desc_count, sizeof(u32));
return nvme_trans_copy_to_user(hdr, inq_response, 0x3c);
}
static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int alloc_len) int alloc_len)
{ {
@ -2268,6 +2290,10 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
case VPD_EXTENDED_INQUIRY: case VPD_EXTENDED_INQUIRY:
res = nvme_trans_ext_inq_page(ns, hdr, alloc_len); res = nvme_trans_ext_inq_page(ns, hdr, alloc_len);
break; break;
case VPD_BLOCK_LIMITS:
res = nvme_trans_bdev_limits_page(ns, hdr, inq_response,
alloc_len);
break;
case VPD_BLOCK_DEV_CHARACTERISTICS: case VPD_BLOCK_DEV_CHARACTERISTICS:
res = nvme_trans_bdev_char_page(ns, hdr, alloc_len); res = nvme_trans_bdev_char_page(ns, hdr, alloc_len);
break; break;

View File

@ -34,6 +34,8 @@
* IN THE SOFTWARE. * IN THE SOFTWARE.
*/ */
#define pr_fmt(fmt) "xen-blkback: " fmt
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/list.h> #include <linux/list.h>
@ -211,7 +213,7 @@ static int add_persistent_gnt(struct xen_blkif *blkif,
else if (persistent_gnt->gnt > this->gnt) else if (persistent_gnt->gnt > this->gnt)
new = &((*new)->rb_right); new = &((*new)->rb_right);
else { else {
pr_alert_ratelimited(DRV_PFX " trying to add a gref that's already in the tree\n"); pr_alert_ratelimited("trying to add a gref that's already in the tree\n");
return -EINVAL; return -EINVAL;
} }
} }
@ -242,7 +244,7 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
node = node->rb_right; node = node->rb_right;
else { else {
if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) { if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
pr_alert_ratelimited(DRV_PFX " requesting a grant already in use\n"); pr_alert_ratelimited("requesting a grant already in use\n");
return NULL; return NULL;
} }
set_bit(PERSISTENT_GNT_ACTIVE, data->flags); set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
@ -257,7 +259,7 @@ static void put_persistent_gnt(struct xen_blkif *blkif,
struct persistent_gnt *persistent_gnt) struct persistent_gnt *persistent_gnt)
{ {
if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags)) if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
pr_alert_ratelimited(DRV_PFX " freeing a grant already unused"); pr_alert_ratelimited("freeing a grant already unused\n");
set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags); set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags); clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
atomic_dec(&blkif->persistent_gnt_in_use); atomic_dec(&blkif->persistent_gnt_in_use);
@ -374,7 +376,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
} }
if (work_pending(&blkif->persistent_purge_work)) { if (work_pending(&blkif->persistent_purge_work)) {
pr_alert_ratelimited(DRV_PFX "Scheduled work from previous purge is still pending, cannot purge list\n"); pr_alert_ratelimited("Scheduled work from previous purge is still pending, cannot purge list\n");
return; return;
} }
@ -396,7 +398,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
total = num_clean; total = num_clean;
pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean); pr_debug("Going to purge %u persistent grants\n", num_clean);
BUG_ON(!list_empty(&blkif->persistent_purge_list)); BUG_ON(!list_empty(&blkif->persistent_purge_list));
root = &blkif->persistent_gnts; root = &blkif->persistent_gnts;
@ -428,13 +430,13 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
* with the requested num * with the requested num
*/ */
if (!scan_used && !clean_used) { if (!scan_used && !clean_used) {
pr_debug(DRV_PFX "Still missing %u purged frames\n", num_clean); pr_debug("Still missing %u purged frames\n", num_clean);
scan_used = true; scan_used = true;
goto purge_list; goto purge_list;
} }
finished: finished:
if (!clean_used) { if (!clean_used) {
pr_debug(DRV_PFX "Finished scanning for grants to clean, removing used flag\n"); pr_debug("Finished scanning for grants to clean, removing used flag\n");
clean_used = true; clean_used = true;
goto purge_list; goto purge_list;
} }
@ -444,7 +446,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
/* We can defer this work */ /* We can defer this work */
schedule_work(&blkif->persistent_purge_work); schedule_work(&blkif->persistent_purge_work);
pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total); pr_debug("Purged %u/%u\n", (total - num_clean), total);
return; return;
} }
@ -520,20 +522,20 @@ static void xen_vbd_resize(struct xen_blkif *blkif)
struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be); struct xenbus_device *dev = xen_blkbk_xenbus(blkif->be);
unsigned long long new_size = vbd_sz(vbd); unsigned long long new_size = vbd_sz(vbd);
pr_info(DRV_PFX "VBD Resize: Domid: %d, Device: (%d, %d)\n", pr_info("VBD Resize: Domid: %d, Device: (%d, %d)\n",
blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice));
pr_info(DRV_PFX "VBD Resize: new size %llu\n", new_size); pr_info("VBD Resize: new size %llu\n", new_size);
vbd->size = new_size; vbd->size = new_size;
again: again:
err = xenbus_transaction_start(&xbt); err = xenbus_transaction_start(&xbt);
if (err) { if (err) {
pr_warn(DRV_PFX "Error starting transaction"); pr_warn("Error starting transaction\n");
return; return;
} }
err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
(unsigned long long)vbd_sz(vbd)); (unsigned long long)vbd_sz(vbd));
if (err) { if (err) {
pr_warn(DRV_PFX "Error writing new size"); pr_warn("Error writing new size\n");
goto abort; goto abort;
} }
/* /*
@ -543,7 +545,7 @@ static void xen_vbd_resize(struct xen_blkif *blkif)
*/ */
err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
if (err) { if (err) {
pr_warn(DRV_PFX "Error writing the state"); pr_warn("Error writing the state\n");
goto abort; goto abort;
} }
@ -551,7 +553,7 @@ static void xen_vbd_resize(struct xen_blkif *blkif)
if (err == -EAGAIN) if (err == -EAGAIN)
goto again; goto again;
if (err) if (err)
pr_warn(DRV_PFX "Error ending transaction"); pr_warn("Error ending transaction\n");
return; return;
abort: abort:
xenbus_transaction_end(xbt, 1); xenbus_transaction_end(xbt, 1);
@ -578,7 +580,7 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
static void print_stats(struct xen_blkif *blkif) static void print_stats(struct xen_blkif *blkif)
{ {
pr_info("xen-blkback (%s): oo %3llu | rd %4llu | wr %4llu | f %4llu" pr_info("(%s): oo %3llu | rd %4llu | wr %4llu | f %4llu"
" | ds %4llu | pg: %4u/%4d\n", " | ds %4llu | pg: %4u/%4d\n",
current->comm, blkif->st_oo_req, current->comm, blkif->st_oo_req,
blkif->st_rd_req, blkif->st_wr_req, blkif->st_rd_req, blkif->st_wr_req,
@ -855,7 +857,7 @@ static int xen_blkbk_map(struct xen_blkif *blkif,
/* This is a newly mapped grant */ /* This is a newly mapped grant */
BUG_ON(new_map_idx >= segs_to_map); BUG_ON(new_map_idx >= segs_to_map);
if (unlikely(map[new_map_idx].status != 0)) { if (unlikely(map[new_map_idx].status != 0)) {
pr_debug(DRV_PFX "invalid buffer -- could not remap it\n"); pr_debug("invalid buffer -- could not remap it\n");
put_free_pages(blkif, &pages[seg_idx]->page, 1); put_free_pages(blkif, &pages[seg_idx]->page, 1);
pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE; pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
ret |= 1; ret |= 1;
@ -891,14 +893,14 @@ static int xen_blkbk_map(struct xen_blkif *blkif,
goto next; goto next;
} }
pages[seg_idx]->persistent_gnt = persistent_gnt; pages[seg_idx]->persistent_gnt = persistent_gnt;
pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n", pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
persistent_gnt->gnt, blkif->persistent_gnt_c, persistent_gnt->gnt, blkif->persistent_gnt_c,
xen_blkif_max_pgrants); xen_blkif_max_pgrants);
goto next; goto next;
} }
if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) { if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
blkif->vbd.overflow_max_grants = 1; blkif->vbd.overflow_max_grants = 1;
pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n", pr_debug("domain %u, device %#x is using maximum number of persistent grants\n",
blkif->domid, blkif->vbd.handle); blkif->domid, blkif->vbd.handle);
} }
/* /*
@ -916,7 +918,7 @@ static int xen_blkbk_map(struct xen_blkif *blkif,
return ret; return ret;
out_of_memory: out_of_memory:
pr_alert(DRV_PFX "%s: out of memory\n", __func__); pr_alert("%s: out of memory\n", __func__);
put_free_pages(blkif, pages_to_gnt, segs_to_map); put_free_pages(blkif, pages_to_gnt, segs_to_map);
return -ENOMEM; return -ENOMEM;
} }
@ -996,7 +998,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
err = xen_vbd_translate(&preq, blkif, WRITE); err = xen_vbd_translate(&preq, blkif, WRITE);
if (err) { if (err) {
pr_warn(DRV_PFX "access denied: DISCARD [%llu->%llu] on dev=%04x\n", pr_warn("access denied: DISCARD [%llu->%llu] on dev=%04x\n",
preq.sector_number, preq.sector_number,
preq.sector_number + preq.nr_sects, blkif->vbd.pdevice); preq.sector_number + preq.nr_sects, blkif->vbd.pdevice);
goto fail_response; goto fail_response;
@ -1012,7 +1014,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
GFP_KERNEL, secure); GFP_KERNEL, secure);
fail_response: fail_response:
if (err == -EOPNOTSUPP) { if (err == -EOPNOTSUPP) {
pr_debug(DRV_PFX "discard op failed, not supported\n"); pr_debug("discard op failed, not supported\n");
status = BLKIF_RSP_EOPNOTSUPP; status = BLKIF_RSP_EOPNOTSUPP;
} else if (err) } else if (err)
status = BLKIF_RSP_ERROR; status = BLKIF_RSP_ERROR;
@ -1056,16 +1058,16 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
/* An error fails the entire request. */ /* An error fails the entire request. */
if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
(error == -EOPNOTSUPP)) { (error == -EOPNOTSUPP)) {
pr_debug(DRV_PFX "flush diskcache op failed, not supported\n"); pr_debug("flush diskcache op failed, not supported\n");
xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0); xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP; pending_req->status = BLKIF_RSP_EOPNOTSUPP;
} else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
(error == -EOPNOTSUPP)) { (error == -EOPNOTSUPP)) {
pr_debug(DRV_PFX "write barrier op failed, not supported\n"); pr_debug("write barrier op failed, not supported\n");
xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0); xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP; pending_req->status = BLKIF_RSP_EOPNOTSUPP;
} else if (error) { } else if (error) {
pr_debug(DRV_PFX "Buffer not up-to-date at end of operation," pr_debug("Buffer not up-to-date at end of operation,"
" error=%d\n", error); " error=%d\n", error);
pending_req->status = BLKIF_RSP_ERROR; pending_req->status = BLKIF_RSP_ERROR;
} }
@ -1110,7 +1112,7 @@ __do_block_io_op(struct xen_blkif *blkif)
if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) { if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
rc = blk_rings->common.rsp_prod_pvt; rc = blk_rings->common.rsp_prod_pvt;
pr_warn(DRV_PFX "Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n", pr_warn("Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
rp, rc, rp - rc, blkif->vbd.pdevice); rp, rc, rp - rc, blkif->vbd.pdevice);
return -EACCES; return -EACCES;
} }
@ -1217,8 +1219,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
if ((req->operation == BLKIF_OP_INDIRECT) && if ((req->operation == BLKIF_OP_INDIRECT) &&
(req_operation != BLKIF_OP_READ) && (req_operation != BLKIF_OP_READ) &&
(req_operation != BLKIF_OP_WRITE)) { (req_operation != BLKIF_OP_WRITE)) {
pr_debug(DRV_PFX "Invalid indirect operation (%u)\n", pr_debug("Invalid indirect operation (%u)\n", req_operation);
req_operation);
goto fail_response; goto fail_response;
} }
@ -1252,8 +1253,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) || (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) ||
unlikely((req->operation == BLKIF_OP_INDIRECT) && unlikely((req->operation == BLKIF_OP_INDIRECT) &&
(nseg > MAX_INDIRECT_SEGMENTS))) { (nseg > MAX_INDIRECT_SEGMENTS))) {
pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", pr_debug("Bad number of segments in request (%d)\n", nseg);
nseg);
/* Haven't submitted any bio's yet. */ /* Haven't submitted any bio's yet. */
goto fail_response; goto fail_response;
} }
@ -1288,7 +1288,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
} }
if (xen_vbd_translate(&preq, blkif, operation) != 0) { if (xen_vbd_translate(&preq, blkif, operation) != 0) {
pr_debug(DRV_PFX "access denied: %s of [%llu,%llu] on dev=%04x\n", pr_debug("access denied: %s of [%llu,%llu] on dev=%04x\n",
operation == READ ? "read" : "write", operation == READ ? "read" : "write",
preq.sector_number, preq.sector_number,
preq.sector_number + preq.nr_sects, preq.sector_number + preq.nr_sects,
@ -1303,7 +1303,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
for (i = 0; i < nseg; i++) { for (i = 0; i < nseg; i++) {
if (((int)preq.sector_number|(int)seg[i].nsec) & if (((int)preq.sector_number|(int)seg[i].nsec) &
((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
pr_debug(DRV_PFX "Misaligned I/O request from domain %d", pr_debug("Misaligned I/O request from domain %d\n",
blkif->domid); blkif->domid);
goto fail_response; goto fail_response;
} }

View File

@ -44,12 +44,6 @@
#include <xen/interface/io/blkif.h> #include <xen/interface/io/blkif.h>
#include <xen/interface/io/protocols.h> #include <xen/interface/io/protocols.h>
#define DRV_PFX "xen-blkback:"
#define DPRINTK(fmt, args...) \
pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \
__func__, __LINE__, ##args)
/* /*
* This is the maximum number of segments that would be allowed in indirect * This is the maximum number of segments that would be allowed in indirect
* requests. This value will also be passed to the frontend. * requests. This value will also be passed to the frontend.

View File

@ -14,6 +14,8 @@
*/ */
#define pr_fmt(fmt) "xen-blkback: " fmt
#include <stdarg.h> #include <stdarg.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/kthread.h> #include <linux/kthread.h>
@ -21,6 +23,9 @@
#include <xen/grant_table.h> #include <xen/grant_table.h>
#include "common.h" #include "common.h"
/* Enlarge the array size in order to fully show blkback name. */
#define BLKBACK_NAME_LEN (20)
struct backend_info { struct backend_info {
struct xenbus_device *dev; struct xenbus_device *dev;
struct xen_blkif *blkif; struct xen_blkif *blkif;
@ -70,7 +75,7 @@ static int blkback_name(struct xen_blkif *blkif, char *buf)
else else
devname = devpath; devname = devpath;
snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); snprintf(buf, BLKBACK_NAME_LEN, "blkback.%d.%s", blkif->domid, devname);
kfree(devpath); kfree(devpath);
return 0; return 0;
@ -79,7 +84,7 @@ static int blkback_name(struct xen_blkif *blkif, char *buf)
static void xen_update_blkif_status(struct xen_blkif *blkif) static void xen_update_blkif_status(struct xen_blkif *blkif)
{ {
int err; int err;
char name[TASK_COMM_LEN]; char name[BLKBACK_NAME_LEN];
/* Not ready to connect? */ /* Not ready to connect? */
if (!blkif->irq || !blkif->vbd.bdev) if (!blkif->irq || !blkif->vbd.bdev)
@ -424,14 +429,14 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
FMODE_READ : FMODE_WRITE, NULL); FMODE_READ : FMODE_WRITE, NULL);
if (IS_ERR(bdev)) { if (IS_ERR(bdev)) {
DPRINTK("xen_vbd_create: device %08x could not be opened.\n", pr_warn("xen_vbd_create: device %08x could not be opened\n",
vbd->pdevice); vbd->pdevice);
return -ENOENT; return -ENOENT;
} }
vbd->bdev = bdev; vbd->bdev = bdev;
if (vbd->bdev->bd_disk == NULL) { if (vbd->bdev->bd_disk == NULL) {
DPRINTK("xen_vbd_create: device %08x doesn't exist.\n", pr_warn("xen_vbd_create: device %08x doesn't exist\n",
vbd->pdevice); vbd->pdevice);
xen_vbd_free(vbd); xen_vbd_free(vbd);
return -ENOENT; return -ENOENT;
@ -450,7 +455,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
if (q && blk_queue_secdiscard(q)) if (q && blk_queue_secdiscard(q))
vbd->discard_secure = true; vbd->discard_secure = true;
DPRINTK("Successful creation of handle=%04x (dom=%u)\n", pr_debug("Successful creation of handle=%04x (dom=%u)\n",
handle, blkif->domid); handle, blkif->domid);
return 0; return 0;
} }
@ -458,7 +463,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
{ {
struct backend_info *be = dev_get_drvdata(&dev->dev); struct backend_info *be = dev_get_drvdata(&dev->dev);
DPRINTK(""); pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
if (be->major || be->minor) if (be->major || be->minor)
xenvbd_sysfs_delif(dev); xenvbd_sysfs_delif(dev);
@ -564,6 +569,10 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
int err; int err;
struct backend_info *be = kzalloc(sizeof(struct backend_info), struct backend_info *be = kzalloc(sizeof(struct backend_info),
GFP_KERNEL); GFP_KERNEL);
/* match the pr_debug in xen_blkbk_remove */
pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
if (!be) { if (!be) {
xenbus_dev_fatal(dev, -ENOMEM, xenbus_dev_fatal(dev, -ENOMEM,
"allocating backend structure"); "allocating backend structure");
@ -595,7 +604,7 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
return 0; return 0;
fail: fail:
DPRINTK("failed"); pr_warn("%s failed\n", __func__);
xen_blkbk_remove(dev); xen_blkbk_remove(dev);
return err; return err;
} }
@ -619,7 +628,7 @@ static void backend_changed(struct xenbus_watch *watch,
unsigned long handle; unsigned long handle;
char *device_type; char *device_type;
DPRINTK(""); pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
&major, &minor); &major, &minor);
@ -638,7 +647,7 @@ static void backend_changed(struct xenbus_watch *watch,
if (be->major | be->minor) { if (be->major | be->minor) {
if (be->major != major || be->minor != minor) if (be->major != major || be->minor != minor)
pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
be->major, be->minor, major, minor); be->major, be->minor, major, minor);
return; return;
} }
@ -699,13 +708,12 @@ static void frontend_changed(struct xenbus_device *dev,
struct backend_info *be = dev_get_drvdata(&dev->dev); struct backend_info *be = dev_get_drvdata(&dev->dev);
int err; int err;
DPRINTK("%s", xenbus_strstate(frontend_state)); pr_debug("%s %p %s\n", __func__, dev, xenbus_strstate(frontend_state));
switch (frontend_state) { switch (frontend_state) {
case XenbusStateInitialising: case XenbusStateInitialising:
if (dev->state == XenbusStateClosed) { if (dev->state == XenbusStateClosed) {
pr_info(DRV_PFX "%s: prepare for reconnect\n", pr_info("%s: prepare for reconnect\n", dev->nodename);
dev->nodename);
xenbus_switch_state(dev, XenbusStateInitWait); xenbus_switch_state(dev, XenbusStateInitWait);
} }
break; break;
@ -772,7 +780,7 @@ static void connect(struct backend_info *be)
int err; int err;
struct xenbus_device *dev = be->dev; struct xenbus_device *dev = be->dev;
DPRINTK("%s", dev->otherend); pr_debug("%s %s\n", __func__, dev->otherend);
/* Supply the information about the device the frontend needs */ /* Supply the information about the device the frontend needs */
again: again:
@ -858,7 +866,7 @@ static int connect_ring(struct backend_info *be)
char protocol[64] = ""; char protocol[64] = "";
int err; int err;
DPRINTK("%s", dev->otherend); pr_debug("%s %s\n", __func__, dev->otherend);
err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
&ring_ref, "event-channel", "%u", &evtchn, NULL); &ring_ref, "event-channel", "%u", &evtchn, NULL);
@ -893,7 +901,7 @@ static int connect_ring(struct backend_info *be)
be->blkif->vbd.feature_gnt_persistent = pers_grants; be->blkif->vbd.feature_gnt_persistent = pers_grants;
be->blkif->vbd.overflow_max_grants = 0; be->blkif->vbd.overflow_max_grants = 0;
pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n", pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
ring_ref, evtchn, be->blkif->blk_protocol, protocol, ring_ref, evtchn, be->blkif->blk_protocol, protocol,
pers_grants ? "persistent grants" : ""); pers_grants ? "persistent grants" : "");

View File

@ -1,46 +0,0 @@
/*
* 1999 Copyright (C) Pavel Machek, pavel@ucw.cz. This code is GPL.
* 1999/11/04 Copyright (C) 1999 VMware, Inc. (Regis "HPReg" Duchesne)
* Made nbd_end_request() use the io_request_lock
* 2001 Copyright (C) Steven Whitehouse
* New nbd_end_request() for compatibility with new linux block
* layer code.
* 2003/06/24 Louis D. Langholtz <ldl@aros.net>
* Removed unneeded blksize_bits field from nbd_device struct.
* Cleanup PARANOIA usage & code.
* 2004/02/19 Paul Clements
* Removed PARANOIA, plus various cleanup and comments
*/
#ifndef LINUX_NBD_H
#define LINUX_NBD_H
#include <linux/wait.h>
#include <linux/mutex.h>
#include <uapi/linux/nbd.h>
struct request;
struct nbd_device {
int flags;
int harderror; /* Code of hard error */
struct socket * sock; /* If == NULL, device is not ready, yet */
int magic;
spinlock_t queue_lock;
struct list_head queue_head; /* Requests waiting result */
struct request *active_req;
wait_queue_head_t active_wq;
struct list_head waiting_queue; /* Requests to be sent */
wait_queue_head_t waiting_wq;
struct mutex tx_lock;
struct gendisk *disk;
int blksize;
u64 bytesize;
pid_t pid; /* pid of nbd-client, if attached */
int xmit_timeout;
int disconnect; /* a disconnect has been requested by user */
};
#endif

View File

@ -117,8 +117,9 @@ struct nvme_ns {
unsigned ns_id; unsigned ns_id;
int lba_shift; int lba_shift;
int ms; u16 ms;
int pi_type; bool ext;
u8 pi_type;
u64 mode_select_num_blocks; u64 mode_select_num_blocks;
u32 mode_select_block_len; u32 mode_select_block_len;
}; };