From 3daa41078aedf227ec98b0d1c9d56b77b6d20153 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 18 Dec 2015 09:54:53 +0100 Subject: [PATCH 01/15] scsi: revert change to scsi_req_cancel_async and add assertions Fam Zheng noticed that the change in commit 36896bf ("scsi: always call notifier on async cancellation", 2015-12-16) could cause a leak of the request; scsi_req_cancel_async now calls scsi_req_ref multiple times for multiple cancellations, but there is only one call to scsi_req_cancel_complete. So revert the patch and instead assert that the problematic case (a call to scsi_req_cancel_async after the aiocb has been completed) cannot happen. Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-bus.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 00bddc9270..378bf4dfd1 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -1759,6 +1759,15 @@ void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier) if (notifier) { notifier_list_add(&req->cancel_notifiers, notifier); } + if (req->io_canceled) { + /* A blk_aio_cancel_async is pending; when it finishes, + * scsi_req_cancel_complete will be called and will + * call the notifier we just added. Just wait for that. + */ + assert(req->aiocb); + return; + } + /* Dropped in scsi_req_cancel_complete. */ scsi_req_ref(req); scsi_req_dequeue(req); req->io_canceled = true; @@ -1775,6 +1784,8 @@ void scsi_req_cancel(SCSIRequest *req) if (!req->enqueued) { return; } + assert(!req->io_canceled); + /* Dropped in scsi_req_cancel_complete. */ scsi_req_ref(req); scsi_req_dequeue(req); req->io_canceled = true; From 76c64d33601a4948d6f72022992574a75b6fab97 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 17 Nov 2015 17:09:33 +0100 Subject: [PATCH 02/15] target-i386: do not duplicate page protection checks x86_cpu_handle_mmu_fault is currently checking twice for writability and executability of pages; the first time to decide whether to trigger a page fault, the second time to compute the "prot" argument to tlb_set_page_with_attrs. Reorganize code so that first "prot" is computed, then it is used to check whether to raise a page fault, then finally PROT_WRITE is removed if the D bit will have to be set. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target-i386/helper.c | 67 ++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 43 deletions(-) diff --git a/target-i386/helper.c b/target-i386/helper.c index d18be95c3f..6b10019e70 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -890,38 +890,30 @@ do_check_protect_pse36: goto do_fault_rsvd; } ptep ^= PG_NX_MASK; - if ((ptep & PG_NX_MASK) && is_write1 == 2) { + + /* can the page can be put in the TLB? prot will tell us */ + if (is_user && !(ptep & PG_USER_MASK)) { goto do_fault_protect; } - switch (mmu_idx) { - case MMU_USER_IDX: - if (!(ptep & PG_USER_MASK)) { - goto do_fault_protect; - } - if (is_write && !(ptep & PG_RW_MASK)) { - goto do_fault_protect; - } - break; - case MMU_KSMAP_IDX: - if (is_write1 != 2 && (ptep & PG_USER_MASK)) { - goto do_fault_protect; + prot = 0; + if (mmu_idx != MMU_KSMAP_IDX || !(ptep & PG_USER_MASK)) { + prot |= PAGE_READ; + if ((ptep & PG_RW_MASK) || (!is_user && !(env->cr[0] & CR0_WP_MASK))) { + prot |= PAGE_WRITE; } - /* fall through */ - case MMU_KNOSMAP_IDX: - if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) && - (ptep & PG_USER_MASK)) { - goto do_fault_protect; - } - if ((env->cr[0] & CR0_WP_MASK) && - is_write && !(ptep & PG_RW_MASK)) { - goto do_fault_protect; - } - break; - - default: /* cannot happen */ - break; } + if (!(ptep & PG_NX_MASK) && + (mmu_idx == MMU_USER_IDX || + !((env->cr[4] & CR4_SMEP_MASK) && (ptep & PG_USER_MASK)))) { + prot |= PAGE_EXEC; + } + + if ((prot & (1 << is_write1)) == 0) { + goto do_fault_protect; + } + + /* yes, it can! */ is_dirty = is_write && !(pte & PG_DIRTY_MASK); if (!(pte & PG_ACCESSED_MASK) || is_dirty) { pte |= PG_ACCESSED_MASK; @@ -931,25 +923,13 @@ do_check_protect_pse36: x86_stl_phys_notdirty(cs, pte_addr, pte); } - /* the page can be put in the TLB */ - prot = PAGE_READ; - if (!(ptep & PG_NX_MASK) && - (mmu_idx == MMU_USER_IDX || - !((env->cr[4] & CR4_SMEP_MASK) && (ptep & PG_USER_MASK)))) { - prot |= PAGE_EXEC; - } - if (pte & PG_DIRTY_MASK) { + if (!(pte & PG_DIRTY_MASK)) { /* only set write access if already dirty... otherwise wait for dirty access */ - if (is_user) { - if (ptep & PG_RW_MASK) - prot |= PAGE_WRITE; - } else { - if (!(env->cr[0] & CR0_WP_MASK) || - (ptep & PG_RW_MASK)) - prot |= PAGE_WRITE; - } + assert(!is_write); + prot &= ~PAGE_WRITE; } + do_mapping: pte = pte & env->a20_mask; @@ -962,6 +942,7 @@ do_check_protect_pse36: page_offset = vaddr & (page_size - 1); paddr = pte + page_offset; + assert(prot & (1 << is_write1)); tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env), prot, mmu_idx, page_size); return 0; From 4c1396cb576c9b14425558b73de1584c7a9735d7 Mon Sep 17 00:00:00 2001 From: P J P Date: Fri, 18 Dec 2015 11:35:07 +0530 Subject: [PATCH 03/15] i386: avoid null pointer dereference Hello, A null pointer dereference issue was reported by Mr Ling Liu, CC'd here. It occurs while doing I/O port write operations via hmp interface. In that, 'current_cpu' remains null as it is not called from cpu_exec loop, which results in the said issue. Below is a proposed (tested)patch to fix this issue; Does it look okay? === From ae88a4947fab9a148cd794f8ad2d812e7f5a1d0f Mon Sep 17 00:00:00 2001 From: Prasad J Pandit Date: Fri, 18 Dec 2015 11:16:07 +0530 Subject: [PATCH] i386: avoid null pointer dereference When I/O port write operation is called from hmp interface, 'current_cpu' remains null, as it is not called from cpu_exec() loop. This leads to a null pointer dereference in vapic_write routine. Add check to avoid it. Reported-by: Ling Liu Signed-off-by: Prasad J Pandit Message-Id: Signed-off-by: Paolo Bonzini Signed-off-by: P J P --- hw/i386/kvmvapic.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c index c6d34b2546..f0922da682 100644 --- a/hw/i386/kvmvapic.c +++ b/hw/i386/kvmvapic.c @@ -634,13 +634,18 @@ static int vapic_prepare(VAPICROMState *s) static void vapic_write(void *opaque, hwaddr addr, uint64_t data, unsigned int size) { - CPUState *cs = current_cpu; - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - hwaddr rom_paddr; VAPICROMState *s = opaque; + X86CPU *cpu; + CPUX86State *env; + hwaddr rom_paddr; - cpu_synchronize_state(cs); + if (!current_cpu) { + return; + } + + cpu_synchronize_state(current_cpu); + cpu = X86_CPU(current_cpu); + env = &cpu->env; /* * The VAPIC supports two PIO-based hypercalls, both via port 0x7E. From 36fef36b91f7ec0435215860f1458b5342ce2811 Mon Sep 17 00:00:00 2001 From: P J P Date: Mon, 21 Dec 2015 15:13:13 +0530 Subject: [PATCH 04/15] scsi: initialise info object with appropriate size While processing controller 'CTRL_GET_INFO' command, the routine 'megasas_ctrl_get_info' overflows the '&info' object size. Use its appropriate size to null initialise it. Reported-by: Qinghao Tang Signed-off-by: Prasad J Pandit Message-Id: Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini Signed-off-by: P J P --- hw/scsi/megasas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index d7dc6672ec..576f56cbf2 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -718,7 +718,7 @@ static int megasas_ctrl_get_info(MegasasState *s, MegasasCmd *cmd) BusChild *kid; int num_pd_disks = 0; - memset(&info, 0x0, cmd->iov_size); + memset(&info, 0x0, dcmd_size); if (cmd->iov_size < dcmd_size) { trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, dcmd_size); From fca10318390dd3c2e9c9e90fbcdff0fe50188dbf Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Wed, 23 Dec 2015 21:55:58 +0200 Subject: [PATCH 05/15] vmw_pvscsi: x-disable-pcie, x-old-pci-configuration back-compat props are 2.5 specific pvscsi's x-disable-pcie and x-old-pci-configuration backward compat properties were introduced in 952970b and d5da3ef: vmw_pvscsi: Introduce 'x-old-pci-configuration' backword compatability property vmw_pvscsi: Introduce 'x-disable-pcie' backword compatability property and were placed into HW_COMPAT_2_4. However since these commits were pulled post v2.5, move them to HW_COMPAT_2_5. Signed-off-by: Shmulik Ladkani Message-Id: <1450900558-20113-1-git-send-email-shmulik.ladkani@ravellosystems.com> Signed-off-by: Paolo Bonzini --- include/hw/compat.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/include/hw/compat.h b/include/hw/compat.h index 98df0dd7b5..491884ba49 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -2,6 +2,15 @@ #define HW_COMPAT_H #define HW_COMPAT_2_5 \ + {\ + .driver = "pvscsi",\ + .property = "x-old-pci-configuration",\ + .value = "on",\ + },{\ + .driver = "pvscsi",\ + .property = "x-disable-pcie",\ + .value = "on",\ + },\ {\ .driver = "vmxnet3",\ .property = "x-old-msi-offsets",\ @@ -17,14 +26,6 @@ .driver = "virtio-blk-device",\ .property = "scsi",\ .value = "true",\ - },{\ - .driver = "pvscsi",\ - .property = "x-old-pci-configuration",\ - .value = "on",\ - },{\ - .driver = "pvscsi",\ - .property = "x-disable-pcie",\ - .value = "on",\ },{\ .driver = "e1000",\ .property = "extra_mac_registers",\ From 46f296cd3a3ebc3d30e2dbc1da7c4882e3d35ce5 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 23 Dec 2015 13:59:04 +0000 Subject: [PATCH 06/15] qemu-char: delete send_all/recv_all helper methods The qemu-char.c contains two helper methods send_all and recv_all. These are in fact declared in sockets.h so ought to have been in util/qemu-sockets.c. For added fun the impl of recv_all is completely missing on Win32. Fortunately there is only a single caller of these methods, the TPM passthrough code, which is only ever compiled on Linux. With only a single caller these helpers are not compelling enough to keep so inline them in the TPM code, avoiding the need to fix the missing recv_all on Win32. Signed-off-by: Daniel P. Berrange Message-Id: <1450879144-17111-1-git-send-email-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- hw/tpm/tpm_passthrough.c | 29 ++++++++++++++-- include/qemu/sockets.h | 2 -- qemu-char.c | 71 ---------------------------------------- 3 files changed, 27 insertions(+), 75 deletions(-) diff --git a/hw/tpm/tpm_passthrough.c b/hw/tpm/tpm_passthrough.c index be160c19b3..ab526db0bf 100644 --- a/hw/tpm/tpm_passthrough.c +++ b/hw/tpm/tpm_passthrough.c @@ -83,12 +83,37 @@ static void tpm_passthrough_cancel_cmd(TPMBackend *tb); static int tpm_passthrough_unix_write(int fd, const uint8_t *buf, uint32_t len) { - return send_all(fd, buf, len); + int ret, remain; + + remain = len; + while (len > 0) { + ret = write(fd, buf, remain); + if (ret < 0) { + if (errno != EINTR && errno != EAGAIN) { + return -1; + } + } else if (ret == 0) { + break; + } else { + buf += ret; + remain -= ret; + } + } + return len - remain; } static int tpm_passthrough_unix_read(int fd, uint8_t *buf, uint32_t len) { - return recv_all(fd, buf, len, true); + int ret; + reread: + ret = read(fd, buf, len); + if (ret < 0) { + if (errno != EINTR && errno != EAGAIN) { + return -1; + } + goto reread; + } + return ret; } static uint32_t tpm_passthrough_get_size_from_buffer(const uint8_t *buf) diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 2e7f98518e..c1a81fa619 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -40,8 +40,6 @@ int socket_set_nodelay(int fd); void qemu_set_block(int fd); void qemu_set_nonblock(int fd); int socket_set_fast_reuse(int fd); -int send_all(int fd, const void *buf, int len1); -int recv_all(int fd, void *buf, int len1, bool single_read); #ifdef WIN32 /* Windows has different names for the same constants with the same values */ diff --git a/qemu-char.c b/qemu-char.c index 00a7526761..d7be1851e5 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -696,77 +696,6 @@ static CharDriverState *qemu_chr_open_mux(const char *id, } -#ifdef _WIN32 -int send_all(int fd, const void *buf, int len1) -{ - int ret, len; - - len = len1; - while (len > 0) { - ret = send(fd, buf, len, 0); - if (ret < 0) { - errno = WSAGetLastError(); - if (errno != WSAEWOULDBLOCK) { - return -1; - } - } else if (ret == 0) { - break; - } else { - buf += ret; - len -= ret; - } - } - return len1 - len; -} - -#else - -int send_all(int fd, const void *_buf, int len1) -{ - int ret, len; - const uint8_t *buf = _buf; - - len = len1; - while (len > 0) { - ret = write(fd, buf, len); - if (ret < 0) { - if (errno != EINTR && errno != EAGAIN) - return -1; - } else if (ret == 0) { - break; - } else { - buf += ret; - len -= ret; - } - } - return len1 - len; -} - -int recv_all(int fd, void *_buf, int len1, bool single_read) -{ - int ret, len; - uint8_t *buf = _buf; - - len = len1; - while ((len > 0) && (ret = read(fd, buf, len)) != 0) { - if (ret < 0) { - if (errno != EINTR && errno != EAGAIN) { - return -1; - } - continue; - } else { - if (single_read) { - return ret; - } - buf += ret; - len -= ret; - } - } - return len1 - len; -} - -#endif /* !_WIN32 */ - typedef struct IOWatchPoll { GSource parent; From 1cb6d137ffd380c458be2da24a58404708c0db55 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Tue, 29 Dec 2015 11:32:14 +0800 Subject: [PATCH 07/15] iscsi: send readcapacity10 when readcapacity16 failed When play with Dell MD3000 target, for sure it is a TYPE_DISK, but readcapacity16 would fail. Then we find that readcapacity10 succeeded. It looks like the target just support readcapacity10 even through it is a TYPE_DISK or have some TYPE_ROM characteristics. This patch can give a chance to send readcapacity16 when readcapacity10 failed. This patch is not harmful to original pathes Signed-off-by: Zhu Lingshan Message-Id: <1451359934-9236-1-git-send-email-lszhu@suse.com> [Don't fall through on UNIT ATTENTION. - Paolo] Signed-off-by: Paolo Bonzini --- block/iscsi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/block/iscsi.c b/block/iscsi.c index eb28ddcac3..3acb052b1f 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1243,8 +1243,13 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp) iscsilun->lbprz = !!rc16->lbprz; iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff); } + break; } - break; + if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION + && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) { + break; + } + /* Fall through and try READ CAPACITY(10) instead. */ case TYPE_ROM: task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0); if (task != NULL && task->status == SCSI_STATUS_GOOD) { From e1dc68155cafabfd6a065391f7826d5d0992b46e Mon Sep 17 00:00:00 2001 From: Cao jin Date: Wed, 6 Jan 2016 17:37:46 +0800 Subject: [PATCH 08/15] SCSI device: fix to incomplete QOMify Signed-off-by: Cao jin Acked-by: Michael S. Tsirkin Message-Id: <1452073066-28319-1-git-send-email-caoj.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- hw/scsi/megasas.c | 12 ++++++------ hw/scsi/scsi-bus.c | 4 ++-- hw/scsi/virtio-scsi.c | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index 576f56cbf2..60c0e6cd08 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -744,7 +744,7 @@ static int megasas_ctrl_get_info(MegasasState *s, MegasasCmd *cmd) info.device.type = MFI_INFO_DEV_SAS3G; info.device.port_count = 8; QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { - SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child); + SCSIDevice *sdev = SCSI_DEVICE(kid->child); uint16_t pd_id; if (num_pd_disks < 8) { @@ -960,7 +960,7 @@ static int megasas_dcmd_pd_get_list(MegasasState *s, MegasasCmd *cmd) max_pd_disks = MFI_MAX_SYS_PDS; } QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { - SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child); + SCSIDevice *sdev = SCSI_DEVICE(kid->child); uint16_t pd_id; if (num_pd_disks >= max_pd_disks) @@ -1136,7 +1136,7 @@ static int megasas_dcmd_ld_get_list(MegasasState *s, MegasasCmd *cmd) max_ld_disks = MFI_MAX_LD; } QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { - SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child); + SCSIDevice *sdev = SCSI_DEVICE(kid->child); if (num_ld_disks >= max_ld_disks) { break; @@ -1187,7 +1187,7 @@ static int megasas_dcmd_ld_list_query(MegasasState *s, MegasasCmd *cmd) max_ld_disks = MFI_MAX_LD; } QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { - SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child); + SCSIDevice *sdev = SCSI_DEVICE(kid->child); if (num_ld_disks >= max_ld_disks) { break; @@ -1327,7 +1327,7 @@ static int megasas_dcmd_cfg_read(MegasasState *s, MegasasCmd *cmd) ld_offset = array_offset + sizeof(struct mfi_array) * num_pd_disks; QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { - SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child); + SCSIDevice *sdev = SCSI_DEVICE(kid->child); uint16_t sdev_id = ((sdev->id & 0xFF) << 8) | (sdev->lun & 0xFF); struct mfi_array *array; struct mfi_ld_config *ld; @@ -2237,7 +2237,7 @@ static void megasas_soft_reset(MegasasState *s) * after the initial reset. */ QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { - SCSIDevice *sdev = DO_UPCAST(SCSIDevice, qdev, kid->child); + SCSIDevice *sdev = SCSI_DEVICE(kid->child); sdev->unit_attention = SENSE_CODE(NO_SENSE); scsi_device_unit_attention_reported(sdev); diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 378bf4dfd1..164af87408 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -1861,7 +1861,7 @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) static char *scsibus_get_dev_path(DeviceState *dev) { - SCSIDevice *d = DO_UPCAST(SCSIDevice, qdev, dev); + SCSIDevice *d = SCSI_DEVICE(dev); DeviceState *hba = dev->parent_bus->parent; char *id; char *path; @@ -2034,7 +2034,7 @@ static void scsi_device_class_init(ObjectClass *klass, void *data) static void scsi_dev_instance_init(Object *obj) { DeviceState *dev = DEVICE(obj); - SCSIDevice *s = DO_UPCAST(SCSIDevice, qdev, dev); + SCSIDevice *s = SCSI_DEVICE(dev); device_add_bootindex_property(obj, &s->conf.bootindex, "bootindex", NULL, diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 3a4f520fbb..607593cc96 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -352,7 +352,7 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) target = req->req.tmf.lun[1]; s->resetting++; QTAILQ_FOREACH(kid, &s->bus.qbus.children, sibling) { - d = DO_UPCAST(SCSIDevice, qdev, kid->child); + d = SCSI_DEVICE(kid->child); if (d->channel == 0 && d->id == target) { qdev_reset_all(&d->qdev); } From ee7d7aabdaea4484e069cb99c9fc54e8cb24b56f Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 14 Jan 2016 16:41:01 +0800 Subject: [PATCH 09/15] nbd: Always call "close_fn" in nbd_client_new Rename the parameter "close" to "close_fn" to disambiguous with close(2). This unifies error handling paths of NBDClient allocation: nbd_client_new will shutdown the socket and call the "close_fn" callback if negotiation failed, so the caller don't need a different path than the normal close. The returned pointer is never used, make it void in preparation for the next patch. Signed-off-by: Fam Zheng Message-Id: <1452760863-25350-2-git-send-email-famz@redhat.com> Signed-off-by: Paolo Bonzini --- blockdev-nbd.c | 5 ++--- include/block/nbd.h | 3 +-- nbd.c | 11 +++++------ qemu-nbd.c | 10 +++------- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/blockdev-nbd.c b/blockdev-nbd.c index bcdd18b3f6..4a758ac314 100644 --- a/blockdev-nbd.c +++ b/blockdev-nbd.c @@ -27,9 +27,8 @@ static void nbd_accept(void *opaque) socklen_t addr_len = sizeof(addr); int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len); - if (fd >= 0 && !nbd_client_new(NULL, fd, nbd_client_put)) { - shutdown(fd, 2); - close(fd); + if (fd >= 0) { + nbd_client_new(NULL, fd, nbd_client_put); } } diff --git a/include/block/nbd.h b/include/block/nbd.h index 65f409d804..7eccb41da8 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -98,8 +98,7 @@ NBDExport *nbd_export_find(const char *name); void nbd_export_set_name(NBDExport *exp, const char *name); void nbd_export_close_all(void); -NBDClient *nbd_client_new(NBDExport *exp, int csock, - void (*close)(NBDClient *)); +void nbd_client_new(NBDExport *exp, int csock, void (*close_fn)(NBDClient *)); void nbd_client_get(NBDClient *client); void nbd_client_put(NBDClient *client); diff --git a/nbd.c b/nbd.c index b3d9654499..f8d0221da1 100644 --- a/nbd.c +++ b/nbd.c @@ -1475,8 +1475,7 @@ static void nbd_update_can_read(NBDClient *client) } } -NBDClient *nbd_client_new(NBDExport *exp, int csock, - void (*close)(NBDClient *)) +void nbd_client_new(NBDExport *exp, int csock, void (*close_fn)(NBDClient *)) { NBDClient *client; client = g_malloc0(sizeof(NBDClient)); @@ -1485,10 +1484,11 @@ NBDClient *nbd_client_new(NBDExport *exp, int csock, client->sock = csock; client->can_read = true; if (nbd_send_negotiate(client)) { - g_free(client); - return NULL; + shutdown(client->sock, 2); + close_fn(client); + return; } - client->close = close; + client->close = close_fn; qemu_co_mutex_init(&client->send_lock); nbd_set_handlers(client); @@ -1496,5 +1496,4 @@ NBDClient *nbd_client_new(NBDExport *exp, int csock, QTAILQ_INSERT_TAIL(&exp->clients, client, next); nbd_export_get(exp); } - return client; } diff --git a/qemu-nbd.c b/qemu-nbd.c index a4cf847976..ede4a54d4e 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -333,13 +333,9 @@ static void nbd_accept(void *opaque) return; } - if (nbd_client_new(exp, fd, nbd_client_closed)) { - nb_fds++; - nbd_update_server_fd_handler(server_fd); - } else { - shutdown(fd, 2); - close(fd); - } + nb_fds++; + nbd_update_server_fd_handler(server_fd); + nbd_client_new(exp, fd, nbd_client_closed); } static void nbd_update_server_fd_handler(int fd) From 798bfe00063ceaa90aa2bf6e4e5c569c80fb4e92 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 14 Jan 2016 16:41:02 +0800 Subject: [PATCH 10/15] nbd: Split nbd.c We have NBD server code and client code, all mixed in a file. Now split them into separate files under nbd/, and update MAINTAINERS. filter_nbd for iotest 083 is updated to keep the log filtered out. Signed-off-by: Fam Zheng Message-Id: <1452760863-25350-3-git-send-email-famz@redhat.com> Signed-off-by: Paolo Bonzini --- MAINTAINERS | 5 +- Makefile.objs | 3 +- nbd/Makefile.objs | 1 + nbd/client.c | 361 +++++++++++++++++++++++++++++++++ nbd/common.c | 64 ++++++ nbd/nbd-internal.h | 113 +++++++++++ nbd.c => nbd/server.c | 451 +---------------------------------------- tests/qemu-iotests/083 | 2 +- 8 files changed, 547 insertions(+), 453 deletions(-) create mode 100644 nbd/Makefile.objs create mode 100644 nbd/client.c create mode 100644 nbd/common.c create mode 100644 nbd/nbd-internal.h rename nbd.c => nbd/server.c (68%) diff --git a/MAINTAINERS b/MAINTAINERS index d8b0f36a43..8f44dca48a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1116,8 +1116,9 @@ F: net/netmap.c Network Block Device (NBD) M: Paolo Bonzini S: Odd Fixes -F: block/nbd.c -F: nbd.* +F: block/nbd* +F: nbd/ +F: include/block/nbd* F: qemu-nbd.c T: git git://github.com/bonzini/qemu.git nbd-next diff --git a/Makefile.objs b/Makefile.objs index dac2c02d9f..06b95c72d0 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -8,7 +8,8 @@ util-obj-y += qmp-introspect.o qapi-types.o qapi-visit.o qapi-event.o # block-obj-y is code used by both qemu system emulation and qemu-img block-obj-y = async.o thread-pool.o -block-obj-y += nbd.o block.o blockjob.o +block-obj-y += nbd/ +block-obj-y += block.o blockjob.o block-obj-y += main-loop.o iohandler.o qemu-timer.o block-obj-$(CONFIG_POSIX) += aio-posix.o block-obj-$(CONFIG_WIN32) += aio-win32.o diff --git a/nbd/Makefile.objs b/nbd/Makefile.objs new file mode 100644 index 0000000000..eb3dd4461d --- /dev/null +++ b/nbd/Makefile.objs @@ -0,0 +1 @@ +block-obj-y += server.o client.o common.o diff --git a/nbd/client.c b/nbd/client.c new file mode 100644 index 0000000000..83df7ba378 --- /dev/null +++ b/nbd/client.c @@ -0,0 +1,361 @@ +/* + * Copyright (C) 2005 Anthony Liguori + * + * Network Block Device Client Side + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "nbd-internal.h" + +static int nbd_errno_to_system_errno(int err) +{ + switch (err) { + case NBD_SUCCESS: + return 0; + case NBD_EPERM: + return EPERM; + case NBD_EIO: + return EIO; + case NBD_ENOMEM: + return ENOMEM; + case NBD_ENOSPC: + return ENOSPC; + case NBD_EINVAL: + default: + return EINVAL; + } +} + +/* Definitions for opaque data types */ + +static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); + +/* That's all folks */ + +/* Basic flow for negotiation + + Server Client + Negotiate + + or + + Server Client + Negotiate #1 + Option + Negotiate #2 + + ---- + + followed by + + Server Client + Request + Response + Request + Response + ... + ... + Request (type == 2) + +*/ + +int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, + off_t *size, Error **errp) +{ + char buf[256]; + uint64_t magic, s; + uint16_t tmp; + int rc; + + TRACE("Receiving negotiation."); + + rc = -EINVAL; + + if (read_sync(csock, buf, 8) != 8) { + error_setg(errp, "Failed to read data"); + goto fail; + } + + buf[8] = '\0'; + if (strlen(buf) == 0) { + error_setg(errp, "Server connection closed unexpectedly"); + goto fail; + } + + TRACE("Magic is %c%c%c%c%c%c%c%c", + qemu_isprint(buf[0]) ? buf[0] : '.', + qemu_isprint(buf[1]) ? buf[1] : '.', + qemu_isprint(buf[2]) ? buf[2] : '.', + qemu_isprint(buf[3]) ? buf[3] : '.', + qemu_isprint(buf[4]) ? buf[4] : '.', + qemu_isprint(buf[5]) ? buf[5] : '.', + qemu_isprint(buf[6]) ? buf[6] : '.', + qemu_isprint(buf[7]) ? buf[7] : '.'); + + if (memcmp(buf, "NBDMAGIC", 8) != 0) { + error_setg(errp, "Invalid magic received"); + goto fail; + } + + if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { + error_setg(errp, "Failed to read magic"); + goto fail; + } + magic = be64_to_cpu(magic); + TRACE("Magic is 0x%" PRIx64, magic); + + if (name) { + uint32_t reserved = 0; + uint32_t opt; + uint32_t namesize; + + TRACE("Checking magic (opts_magic)"); + if (magic != NBD_OPTS_MAGIC) { + if (magic == NBD_CLIENT_MAGIC) { + error_setg(errp, "Server does not support export names"); + } else { + error_setg(errp, "Bad magic received"); + } + goto fail; + } + if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { + error_setg(errp, "Failed to read server flags"); + goto fail; + } + *flags = be16_to_cpu(tmp) << 16; + /* reserved for future use */ + if (write_sync(csock, &reserved, sizeof(reserved)) != + sizeof(reserved)) { + error_setg(errp, "Failed to read reserved field"); + goto fail; + } + /* write the export name */ + magic = cpu_to_be64(magic); + if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { + error_setg(errp, "Failed to send export name magic"); + goto fail; + } + opt = cpu_to_be32(NBD_OPT_EXPORT_NAME); + if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) { + error_setg(errp, "Failed to send export name option number"); + goto fail; + } + namesize = cpu_to_be32(strlen(name)); + if (write_sync(csock, &namesize, sizeof(namesize)) != + sizeof(namesize)) { + error_setg(errp, "Failed to send export name length"); + goto fail; + } + if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) { + error_setg(errp, "Failed to send export name"); + goto fail; + } + } else { + TRACE("Checking magic (cli_magic)"); + + if (magic != NBD_CLIENT_MAGIC) { + if (magic == NBD_OPTS_MAGIC) { + error_setg(errp, "Server requires an export name"); + } else { + error_setg(errp, "Bad magic received"); + } + goto fail; + } + } + + if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) { + error_setg(errp, "Failed to read export length"); + goto fail; + } + *size = be64_to_cpu(s); + TRACE("Size is %" PRIu64, *size); + + if (!name) { + if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) { + error_setg(errp, "Failed to read export flags"); + goto fail; + } + *flags = be32_to_cpup(flags); + } else { + if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { + error_setg(errp, "Failed to read export flags"); + goto fail; + } + *flags |= be16_to_cpu(tmp); + } + if (read_sync(csock, &buf, 124) != 124) { + error_setg(errp, "Failed to read reserved block"); + goto fail; + } + rc = 0; + +fail: + return rc; +} + +#ifdef __linux__ +int nbd_init(int fd, int csock, uint32_t flags, off_t size) +{ + TRACE("Setting NBD socket"); + + if (ioctl(fd, NBD_SET_SOCK, csock) < 0) { + int serrno = errno; + LOG("Failed to set NBD socket"); + return -serrno; + } + + TRACE("Setting block size to %lu", (unsigned long)BDRV_SECTOR_SIZE); + + if (ioctl(fd, NBD_SET_BLKSIZE, (size_t)BDRV_SECTOR_SIZE) < 0) { + int serrno = errno; + LOG("Failed setting NBD block size"); + return -serrno; + } + + TRACE("Setting size to %zd block(s)", (size_t)(size / BDRV_SECTOR_SIZE)); + + if (ioctl(fd, NBD_SET_SIZE_BLOCKS, (size_t)(size / BDRV_SECTOR_SIZE)) < 0) { + int serrno = errno; + LOG("Failed setting size (in blocks)"); + return -serrno; + } + + if (ioctl(fd, NBD_SET_FLAGS, flags) < 0) { + if (errno == ENOTTY) { + int read_only = (flags & NBD_FLAG_READ_ONLY) != 0; + TRACE("Setting readonly attribute"); + + if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) { + int serrno = errno; + LOG("Failed setting read-only attribute"); + return -serrno; + } + } else { + int serrno = errno; + LOG("Failed setting flags"); + return -serrno; + } + } + + TRACE("Negotiation ended"); + + return 0; +} + +int nbd_client(int fd) +{ + int ret; + int serrno; + + TRACE("Doing NBD loop"); + + ret = ioctl(fd, NBD_DO_IT); + if (ret < 0 && errno == EPIPE) { + /* NBD_DO_IT normally returns EPIPE when someone has disconnected + * the socket via NBD_DISCONNECT. We do not want to return 1 in + * that case. + */ + ret = 0; + } + serrno = errno; + + TRACE("NBD loop returned %d: %s", ret, strerror(serrno)); + + TRACE("Clearing NBD queue"); + ioctl(fd, NBD_CLEAR_QUE); + + TRACE("Clearing NBD socket"); + ioctl(fd, NBD_CLEAR_SOCK); + + errno = serrno; + return ret; +} +#else +int nbd_init(int fd, int csock, uint32_t flags, off_t size) +{ + return -ENOTSUP; +} + +int nbd_client(int fd) +{ + return -ENOTSUP; +} +#endif + +ssize_t nbd_send_request(int csock, struct nbd_request *request) +{ + uint8_t buf[NBD_REQUEST_SIZE]; + ssize_t ret; + + cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC); + cpu_to_be32w((uint32_t*)(buf + 4), request->type); + cpu_to_be64w((uint64_t*)(buf + 8), request->handle); + cpu_to_be64w((uint64_t*)(buf + 16), request->from); + cpu_to_be32w((uint32_t*)(buf + 24), request->len); + + TRACE("Sending request to client: " + "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}", + request->from, request->len, request->handle, request->type); + + ret = write_sync(csock, buf, sizeof(buf)); + if (ret < 0) { + return ret; + } + + if (ret != sizeof(buf)) { + LOG("writing to socket failed"); + return -EINVAL; + } + return 0; +} + +ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply) +{ + uint8_t buf[NBD_REPLY_SIZE]; + uint32_t magic; + ssize_t ret; + + ret = read_sync(csock, buf, sizeof(buf)); + if (ret < 0) { + return ret; + } + + if (ret != sizeof(buf)) { + LOG("read failed"); + return -EINVAL; + } + + /* Reply + [ 0 .. 3] magic (NBD_REPLY_MAGIC) + [ 4 .. 7] error (0 == no error) + [ 7 .. 15] handle + */ + + magic = be32_to_cpup((uint32_t*)buf); + reply->error = be32_to_cpup((uint32_t*)(buf + 4)); + reply->handle = be64_to_cpup((uint64_t*)(buf + 8)); + + reply->error = nbd_errno_to_system_errno(reply->error); + + TRACE("Got reply: " + "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }", + magic, reply->error, reply->handle); + + if (magic != NBD_REPLY_MAGIC) { + LOG("invalid magic (got 0x%x)", magic); + return -EINVAL; + } + return 0; +} + diff --git a/nbd/common.c b/nbd/common.c new file mode 100644 index 0000000000..7b089b0f3b --- /dev/null +++ b/nbd/common.c @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2005 Anthony Liguori + * + * Network Block Device Common Code + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "nbd-internal.h" + +ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) +{ + size_t offset = 0; + int err; + + if (qemu_in_coroutine()) { + if (do_read) { + return qemu_co_recv(fd, buffer, size); + } else { + return qemu_co_send(fd, buffer, size); + } + } + + while (offset < size) { + ssize_t len; + + if (do_read) { + len = qemu_recv(fd, buffer + offset, size - offset, 0); + } else { + len = send(fd, buffer + offset, size - offset, 0); + } + + if (len < 0) { + err = socket_error(); + + /* recoverable error */ + if (err == EINTR || (offset > 0 && (err == EAGAIN || err == EWOULDBLOCK))) { + continue; + } + + /* unrecoverable error */ + return -err; + } + + /* eof */ + if (len == 0) { + break; + } + + offset += len; + } + + return offset; +} diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h new file mode 100644 index 0000000000..c0a657575b --- /dev/null +++ b/nbd/nbd-internal.h @@ -0,0 +1,113 @@ +/* + * NBD Internal Declarations + * + * Copyright (C) 2016 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef NBD_INTERNAL_H +#define NBD_INTERNAL_H +#include "block/nbd.h" +#include "sysemu/block-backend.h" + +#include "qemu/coroutine.h" + +#include +#include +#ifndef _WIN32 +#include +#endif +#if defined(__sun__) || defined(__HAIKU__) +#include +#endif +#include +#include + +#ifdef __linux__ +#include +#endif + +#include "qemu/sockets.h" +#include "qemu/queue.h" +#include "qemu/main-loop.h" + +/* #define DEBUG_NBD */ + +#ifdef DEBUG_NBD +#define TRACE(msg, ...) do { \ + LOG(msg, ## __VA_ARGS__); \ +} while(0) +#else +#define TRACE(msg, ...) \ + do { } while (0) +#endif + +#define LOG(msg, ...) do { \ + fprintf(stderr, "%s:%s():L%d: " msg "\n", \ + __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ +} while(0) + +/* This is all part of the "official" NBD API. + * + * The most up-to-date documentation is available at: + * https://github.com/yoe/nbd/blob/master/doc/proto.txt + */ + +#define NBD_REQUEST_SIZE (4 + 4 + 8 + 8 + 4) +#define NBD_REPLY_SIZE (4 + 4 + 8) +#define NBD_REQUEST_MAGIC 0x25609513 +#define NBD_REPLY_MAGIC 0x67446698 +#define NBD_OPTS_MAGIC 0x49484156454F5054LL +#define NBD_CLIENT_MAGIC 0x0000420281861253LL +#define NBD_REP_MAGIC 0x3e889045565a9LL + +#define NBD_SET_SOCK _IO(0xab, 0) +#define NBD_SET_BLKSIZE _IO(0xab, 1) +#define NBD_SET_SIZE _IO(0xab, 2) +#define NBD_DO_IT _IO(0xab, 3) +#define NBD_CLEAR_SOCK _IO(0xab, 4) +#define NBD_CLEAR_QUE _IO(0xab, 5) +#define NBD_PRINT_DEBUG _IO(0xab, 6) +#define NBD_SET_SIZE_BLOCKS _IO(0xab, 7) +#define NBD_DISCONNECT _IO(0xab, 8) +#define NBD_SET_TIMEOUT _IO(0xab, 9) +#define NBD_SET_FLAGS _IO(0xab, 10) + +#define NBD_OPT_EXPORT_NAME (1) +#define NBD_OPT_ABORT (2) +#define NBD_OPT_LIST (3) + +/* NBD errors are based on errno numbers, so there is a 1:1 mapping, + * but only a limited set of errno values is specified in the protocol. + * Everything else is squashed to EINVAL. + */ +#define NBD_SUCCESS 0 +#define NBD_EPERM 1 +#define NBD_EIO 5 +#define NBD_ENOMEM 12 +#define NBD_EINVAL 22 +#define NBD_ENOSPC 28 + +static inline ssize_t read_sync(int fd, void *buffer, size_t size) +{ + /* Sockets are kept in blocking mode in the negotiation phase. After + * that, a non-readable socket simply means that another thread stole + * our request/reply. Synchronization is done with recv_coroutine, so + * that this is coroutine-safe. + */ + return nbd_wr_sync(fd, buffer, size, true); +} + +static inline ssize_t write_sync(int fd, void *buffer, size_t size) +{ + int ret; + do { + /* For writes, we do expect the socket to be writable. */ + ret = nbd_wr_sync(fd, buffer, size, false); + } while (ret == -EAGAIN); + return ret; +} + +#endif diff --git a/nbd.c b/nbd/server.c similarity index 68% rename from nbd.c rename to nbd/server.c index f8d0221da1..ba25ce3604 100644 --- a/nbd.c +++ b/nbd/server.c @@ -1,7 +1,7 @@ /* * Copyright (C) 2005 Anthony Liguori * - * Network Block Device + * Network Block Device Server Side * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,86 +16,7 @@ * along with this program; if not, see . */ -#include "block/nbd.h" -#include "sysemu/block-backend.h" - -#include "qemu/coroutine.h" - -#include -#include -#ifndef _WIN32 -#include -#endif -#if defined(__sun__) || defined(__HAIKU__) -#include -#endif -#include -#include - -#ifdef __linux__ -#include -#endif - -#include "qemu/sockets.h" -#include "qemu/queue.h" -#include "qemu/main-loop.h" - -//#define DEBUG_NBD - -#ifdef DEBUG_NBD -#define TRACE(msg, ...) do { \ - LOG(msg, ## __VA_ARGS__); \ -} while(0) -#else -#define TRACE(msg, ...) \ - do { } while (0) -#endif - -#define LOG(msg, ...) do { \ - fprintf(stderr, "%s:%s():L%d: " msg "\n", \ - __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \ -} while(0) - -/* This is all part of the "official" NBD API. - * - * The most up-to-date documentation is available at: - * https://github.com/yoe/nbd/blob/master/doc/proto.txt - */ - -#define NBD_REQUEST_SIZE (4 + 4 + 8 + 8 + 4) -#define NBD_REPLY_SIZE (4 + 4 + 8) -#define NBD_REQUEST_MAGIC 0x25609513 -#define NBD_REPLY_MAGIC 0x67446698 -#define NBD_OPTS_MAGIC 0x49484156454F5054LL -#define NBD_CLIENT_MAGIC 0x0000420281861253LL -#define NBD_REP_MAGIC 0x3e889045565a9LL - -#define NBD_SET_SOCK _IO(0xab, 0) -#define NBD_SET_BLKSIZE _IO(0xab, 1) -#define NBD_SET_SIZE _IO(0xab, 2) -#define NBD_DO_IT _IO(0xab, 3) -#define NBD_CLEAR_SOCK _IO(0xab, 4) -#define NBD_CLEAR_QUE _IO(0xab, 5) -#define NBD_PRINT_DEBUG _IO(0xab, 6) -#define NBD_SET_SIZE_BLOCKS _IO(0xab, 7) -#define NBD_DISCONNECT _IO(0xab, 8) -#define NBD_SET_TIMEOUT _IO(0xab, 9) -#define NBD_SET_FLAGS _IO(0xab, 10) - -#define NBD_OPT_EXPORT_NAME (1) -#define NBD_OPT_ABORT (2) -#define NBD_OPT_LIST (3) - -/* NBD errors are based on errno numbers, so there is a 1:1 mapping, - * but only a limited set of errno values is specified in the protocol. - * Everything else is squashed to EINVAL. - */ -#define NBD_SUCCESS 0 -#define NBD_EPERM 1 -#define NBD_EIO 5 -#define NBD_ENOMEM 12 -#define NBD_EINVAL 22 -#define NBD_ENOSPC 28 +#include "nbd-internal.h" static int system_errno_to_nbd_errno(int err) { @@ -120,25 +41,6 @@ static int system_errno_to_nbd_errno(int err) } } -static int nbd_errno_to_system_errno(int err) -{ - switch (err) { - case NBD_SUCCESS: - return 0; - case NBD_EPERM: - return EPERM; - case NBD_EIO: - return EIO; - case NBD_ENOMEM: - return ENOMEM; - case NBD_ENOSPC: - return ENOSPC; - case NBD_EINVAL: - default: - return EINVAL; - } -} - /* Definitions for opaque data types */ typedef struct NBDRequest NBDRequest; @@ -191,61 +93,6 @@ static void nbd_set_handlers(NBDClient *client); static void nbd_unset_handlers(NBDClient *client); static void nbd_update_can_read(NBDClient *client); -ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read) -{ - size_t offset = 0; - int err; - - if (qemu_in_coroutine()) { - if (do_read) { - return qemu_co_recv(fd, buffer, size); - } else { - return qemu_co_send(fd, buffer, size); - } - } - - while (offset < size) { - ssize_t len; - - if (do_read) { - len = qemu_recv(fd, buffer + offset, size - offset, 0); - } else { - len = send(fd, buffer + offset, size - offset, 0); - } - - if (len < 0) { - err = socket_error(); - - /* recoverable error */ - if (err == EINTR || (offset > 0 && (err == EAGAIN || err == EWOULDBLOCK))) { - continue; - } - - /* unrecoverable error */ - return -err; - } - - /* eof */ - if (len == 0) { - break; - } - - offset += len; - } - - return offset; -} - -static ssize_t read_sync(int fd, void *buffer, size_t size) -{ - /* Sockets are kept in blocking mode in the negotiation phase. After - * that, a non-readable socket simply means that another thread stole - * our request/reply. Synchronization is done with recv_coroutine, so - * that this is coroutine-safe. - */ - return nbd_wr_sync(fd, buffer, size, true); -} - static ssize_t drop_sync(int fd, size_t size) { ssize_t ret, dropped = size; @@ -266,16 +113,6 @@ static ssize_t drop_sync(int fd, size_t size) return dropped; } -static ssize_t write_sync(int fd, void *buffer, size_t size) -{ - int ret; - do { - /* For writes, we do expect the socket to be writable. */ - ret = nbd_wr_sync(fd, buffer, size, false); - } while (ret == -EAGAIN); - return ret; -} - /* Basic flow for negotiation Server Client @@ -579,188 +416,7 @@ fail: return rc; } -int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, - off_t *size, Error **errp) -{ - char buf[256]; - uint64_t magic, s; - uint16_t tmp; - int rc; - - TRACE("Receiving negotiation."); - - rc = -EINVAL; - - if (read_sync(csock, buf, 8) != 8) { - error_setg(errp, "Failed to read data"); - goto fail; - } - - buf[8] = '\0'; - if (strlen(buf) == 0) { - error_setg(errp, "Server connection closed unexpectedly"); - goto fail; - } - - TRACE("Magic is %c%c%c%c%c%c%c%c", - qemu_isprint(buf[0]) ? buf[0] : '.', - qemu_isprint(buf[1]) ? buf[1] : '.', - qemu_isprint(buf[2]) ? buf[2] : '.', - qemu_isprint(buf[3]) ? buf[3] : '.', - qemu_isprint(buf[4]) ? buf[4] : '.', - qemu_isprint(buf[5]) ? buf[5] : '.', - qemu_isprint(buf[6]) ? buf[6] : '.', - qemu_isprint(buf[7]) ? buf[7] : '.'); - - if (memcmp(buf, "NBDMAGIC", 8) != 0) { - error_setg(errp, "Invalid magic received"); - goto fail; - } - - if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "Failed to read magic"); - goto fail; - } - magic = be64_to_cpu(magic); - TRACE("Magic is 0x%" PRIx64, magic); - - if (name) { - uint32_t reserved = 0; - uint32_t opt; - uint32_t namesize; - - TRACE("Checking magic (opts_magic)"); - if (magic != NBD_OPTS_MAGIC) { - if (magic == NBD_CLIENT_MAGIC) { - error_setg(errp, "Server does not support export names"); - } else { - error_setg(errp, "Bad magic received"); - } - goto fail; - } - if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { - error_setg(errp, "Failed to read server flags"); - goto fail; - } - *flags = be16_to_cpu(tmp) << 16; - /* reserved for future use */ - if (write_sync(csock, &reserved, sizeof(reserved)) != - sizeof(reserved)) { - error_setg(errp, "Failed to read reserved field"); - goto fail; - } - /* write the export name */ - magic = cpu_to_be64(magic); - if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { - error_setg(errp, "Failed to send export name magic"); - goto fail; - } - opt = cpu_to_be32(NBD_OPT_EXPORT_NAME); - if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) { - error_setg(errp, "Failed to send export name option number"); - goto fail; - } - namesize = cpu_to_be32(strlen(name)); - if (write_sync(csock, &namesize, sizeof(namesize)) != - sizeof(namesize)) { - error_setg(errp, "Failed to send export name length"); - goto fail; - } - if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) { - error_setg(errp, "Failed to send export name"); - goto fail; - } - } else { - TRACE("Checking magic (cli_magic)"); - - if (magic != NBD_CLIENT_MAGIC) { - if (magic == NBD_OPTS_MAGIC) { - error_setg(errp, "Server requires an export name"); - } else { - error_setg(errp, "Bad magic received"); - } - goto fail; - } - } - - if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) { - error_setg(errp, "Failed to read export length"); - goto fail; - } - *size = be64_to_cpu(s); - TRACE("Size is %" PRIu64, *size); - - if (!name) { - if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) { - error_setg(errp, "Failed to read export flags"); - goto fail; - } - *flags = be32_to_cpup(flags); - } else { - if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { - error_setg(errp, "Failed to read export flags"); - goto fail; - } - *flags |= be16_to_cpu(tmp); - } - if (read_sync(csock, &buf, 124) != 124) { - error_setg(errp, "Failed to read reserved block"); - goto fail; - } - rc = 0; - -fail: - return rc; -} - #ifdef __linux__ -int nbd_init(int fd, int csock, uint32_t flags, off_t size) -{ - TRACE("Setting NBD socket"); - - if (ioctl(fd, NBD_SET_SOCK, csock) < 0) { - int serrno = errno; - LOG("Failed to set NBD socket"); - return -serrno; - } - - TRACE("Setting block size to %lu", (unsigned long)BDRV_SECTOR_SIZE); - - if (ioctl(fd, NBD_SET_BLKSIZE, (size_t)BDRV_SECTOR_SIZE) < 0) { - int serrno = errno; - LOG("Failed setting NBD block size"); - return -serrno; - } - - TRACE("Setting size to %zd block(s)", (size_t)(size / BDRV_SECTOR_SIZE)); - - if (ioctl(fd, NBD_SET_SIZE_BLOCKS, (size_t)(size / BDRV_SECTOR_SIZE)) < 0) { - int serrno = errno; - LOG("Failed setting size (in blocks)"); - return -serrno; - } - - if (ioctl(fd, NBD_SET_FLAGS, flags) < 0) { - if (errno == ENOTTY) { - int read_only = (flags & NBD_FLAG_READ_ONLY) != 0; - TRACE("Setting readonly attribute"); - - if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) { - int serrno = errno; - LOG("Failed setting read-only attribute"); - return -serrno; - } - } else { - int serrno = errno; - LOG("Failed setting flags"); - return -serrno; - } - } - - TRACE("Negotiation ended"); - - return 0; -} int nbd_disconnect(int fd) { @@ -770,78 +426,14 @@ int nbd_disconnect(int fd) return 0; } -int nbd_client(int fd) -{ - int ret; - int serrno; - - TRACE("Doing NBD loop"); - - ret = ioctl(fd, NBD_DO_IT); - if (ret < 0 && errno == EPIPE) { - /* NBD_DO_IT normally returns EPIPE when someone has disconnected - * the socket via NBD_DISCONNECT. We do not want to return 1 in - * that case. - */ - ret = 0; - } - serrno = errno; - - TRACE("NBD loop returned %d: %s", ret, strerror(serrno)); - - TRACE("Clearing NBD queue"); - ioctl(fd, NBD_CLEAR_QUE); - - TRACE("Clearing NBD socket"); - ioctl(fd, NBD_CLEAR_SOCK); - - errno = serrno; - return ret; -} #else -int nbd_init(int fd, int csock, uint32_t flags, off_t size) -{ - return -ENOTSUP; -} int nbd_disconnect(int fd) { return -ENOTSUP; } - -int nbd_client(int fd) -{ - return -ENOTSUP; -} #endif -ssize_t nbd_send_request(int csock, struct nbd_request *request) -{ - uint8_t buf[NBD_REQUEST_SIZE]; - ssize_t ret; - - cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC); - cpu_to_be32w((uint32_t*)(buf + 4), request->type); - cpu_to_be64w((uint64_t*)(buf + 8), request->handle); - cpu_to_be64w((uint64_t*)(buf + 16), request->from); - cpu_to_be32w((uint32_t*)(buf + 24), request->len); - - TRACE("Sending request to client: " - "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}", - request->from, request->len, request->handle, request->type); - - ret = write_sync(csock, buf, sizeof(buf)); - if (ret < 0) { - return ret; - } - - if (ret != sizeof(buf)) { - LOG("writing to socket failed"); - return -EINVAL; - } - return 0; -} - static ssize_t nbd_receive_request(int csock, struct nbd_request *request) { uint8_t buf[NBD_REQUEST_SIZE]; @@ -883,45 +475,6 @@ static ssize_t nbd_receive_request(int csock, struct nbd_request *request) return 0; } -ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply) -{ - uint8_t buf[NBD_REPLY_SIZE]; - uint32_t magic; - ssize_t ret; - - ret = read_sync(csock, buf, sizeof(buf)); - if (ret < 0) { - return ret; - } - - if (ret != sizeof(buf)) { - LOG("read failed"); - return -EINVAL; - } - - /* Reply - [ 0 .. 3] magic (NBD_REPLY_MAGIC) - [ 4 .. 7] error (0 == no error) - [ 7 .. 15] handle - */ - - magic = be32_to_cpup((uint32_t*)buf); - reply->error = be32_to_cpup((uint32_t*)(buf + 4)); - reply->handle = be64_to_cpup((uint64_t*)(buf + 8)); - - reply->error = nbd_errno_to_system_errno(reply->error); - - TRACE("Got reply: " - "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }", - magic, reply->error, reply->handle); - - if (magic != NBD_REPLY_MAGIC) { - LOG("invalid magic (got 0x%x)", magic); - return -EINVAL; - } - return 0; -} - static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply) { uint8_t buf[NBD_REPLY_SIZE]; diff --git a/tests/qemu-iotests/083 b/tests/qemu-iotests/083 index 1b2d3f1156..566da99323 100755 --- a/tests/qemu-iotests/083 +++ b/tests/qemu-iotests/083 @@ -55,7 +55,7 @@ filter_nbd() { # callbacks sometimes, making them unreliable. # # Filter out the TCP port number since this changes between runs. - sed -e 's#^.*nbd\.c:.*##g' \ + sed -e 's#^.*nbd/.*\.c:.*##g' \ -e 's#nbd:127\.0\.0\.1:[^:]*:#nbd:127\.0\.0\.1:PORT:#g' \ -e 's#\(exportname=foo\|PORT\): Failed to .*$#\1#' } From 1a6245a5b0b4e8d822c739b403fc67c8a7bc8d12 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Thu, 14 Jan 2016 16:41:03 +0800 Subject: [PATCH 11/15] nbd-server: Coroutine based negotiation Create a coroutine in nbd_client_new, so that nbd_send_negotiate doesn't need qemu_set_block(). Handlers need to be set temporarily for csock fd in case the coroutine yields during I/O. With this, if the other end disappears in the middle of the negotiation, we don't block the whole event loop. To make the code clearer, unify all function names that belong to negotiate, so they are less likely to be misused. This is important because we rely on negotiation staying in main loop, as commented in nbd_negotiate_read/write(). Signed-off-by: Fam Zheng Message-Id: <1452760863-25350-4-git-send-email-famz@redhat.com> Signed-off-by: Paolo Bonzini --- nbd/server.c | 154 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 105 insertions(+), 49 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index ba25ce3604..8752885509 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -93,13 +93,45 @@ static void nbd_set_handlers(NBDClient *client); static void nbd_unset_handlers(NBDClient *client); static void nbd_update_can_read(NBDClient *client); -static ssize_t drop_sync(int fd, size_t size) +static void nbd_negotiate_continue(void *opaque) +{ + qemu_coroutine_enter(opaque, NULL); +} + +static ssize_t nbd_negotiate_read(int fd, void *buffer, size_t size) +{ + ssize_t ret; + + assert(qemu_in_coroutine()); + /* Negotiation are always in main loop. */ + qemu_set_fd_handler(fd, nbd_negotiate_continue, NULL, + qemu_coroutine_self()); + ret = read_sync(fd, buffer, size); + qemu_set_fd_handler(fd, NULL, NULL, NULL); + return ret; + +} + +static ssize_t nbd_negotiate_write(int fd, void *buffer, size_t size) +{ + ssize_t ret; + + assert(qemu_in_coroutine()); + /* Negotiation are always in main loop. */ + qemu_set_fd_handler(fd, NULL, nbd_negotiate_continue, + qemu_coroutine_self()); + ret = write_sync(fd, buffer, size); + qemu_set_fd_handler(fd, NULL, NULL, NULL); + return ret; +} + +static ssize_t nbd_negotiate_drop_sync(int fd, size_t size) { ssize_t ret, dropped = size; uint8_t *buffer = g_malloc(MIN(65536, size)); while (size > 0) { - ret = read_sync(fd, buffer, MIN(65536, size)); + ret = nbd_negotiate_read(fd, buffer, MIN(65536, size)); if (ret < 0) { g_free(buffer); return ret; @@ -140,96 +172,96 @@ static ssize_t drop_sync(int fd, size_t size) */ -static int nbd_send_rep(int csock, uint32_t type, uint32_t opt) +static int nbd_negotiate_send_rep(int csock, uint32_t type, uint32_t opt) { uint64_t magic; uint32_t len; magic = cpu_to_be64(NBD_REP_MAGIC); - if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { + if (nbd_negotiate_write(csock, &magic, sizeof(magic)) != sizeof(magic)) { LOG("write failed (rep magic)"); return -EINVAL; } opt = cpu_to_be32(opt); - if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) { + if (nbd_negotiate_write(csock, &opt, sizeof(opt)) != sizeof(opt)) { LOG("write failed (rep opt)"); return -EINVAL; } type = cpu_to_be32(type); - if (write_sync(csock, &type, sizeof(type)) != sizeof(type)) { + if (nbd_negotiate_write(csock, &type, sizeof(type)) != sizeof(type)) { LOG("write failed (rep type)"); return -EINVAL; } len = cpu_to_be32(0); - if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) { + if (nbd_negotiate_write(csock, &len, sizeof(len)) != sizeof(len)) { LOG("write failed (rep data length)"); return -EINVAL; } return 0; } -static int nbd_send_rep_list(int csock, NBDExport *exp) +static int nbd_negotiate_send_rep_list(int csock, NBDExport *exp) { uint64_t magic, name_len; uint32_t opt, type, len; name_len = strlen(exp->name); magic = cpu_to_be64(NBD_REP_MAGIC); - if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { + if (nbd_negotiate_write(csock, &magic, sizeof(magic)) != sizeof(magic)) { LOG("write failed (magic)"); return -EINVAL; } opt = cpu_to_be32(NBD_OPT_LIST); - if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) { + if (nbd_negotiate_write(csock, &opt, sizeof(opt)) != sizeof(opt)) { LOG("write failed (opt)"); return -EINVAL; } type = cpu_to_be32(NBD_REP_SERVER); - if (write_sync(csock, &type, sizeof(type)) != sizeof(type)) { + if (nbd_negotiate_write(csock, &type, sizeof(type)) != sizeof(type)) { LOG("write failed (reply type)"); return -EINVAL; } len = cpu_to_be32(name_len + sizeof(len)); - if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) { + if (nbd_negotiate_write(csock, &len, sizeof(len)) != sizeof(len)) { LOG("write failed (length)"); return -EINVAL; } len = cpu_to_be32(name_len); - if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) { + if (nbd_negotiate_write(csock, &len, sizeof(len)) != sizeof(len)) { LOG("write failed (length)"); return -EINVAL; } - if (write_sync(csock, exp->name, name_len) != name_len) { + if (nbd_negotiate_write(csock, exp->name, name_len) != name_len) { LOG("write failed (buffer)"); return -EINVAL; } return 0; } -static int nbd_handle_list(NBDClient *client, uint32_t length) +static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length) { int csock; NBDExport *exp; csock = client->sock; if (length) { - if (drop_sync(csock, length) != length) { + if (nbd_negotiate_drop_sync(csock, length) != length) { return -EIO; } - return nbd_send_rep(csock, NBD_REP_ERR_INVALID, NBD_OPT_LIST); + return nbd_negotiate_send_rep(csock, NBD_REP_ERR_INVALID, NBD_OPT_LIST); } /* For each export, send a NBD_REP_SERVER reply. */ QTAILQ_FOREACH(exp, &exports, next) { - if (nbd_send_rep_list(csock, exp)) { + if (nbd_negotiate_send_rep_list(csock, exp)) { return -EINVAL; } } /* Finish with a NBD_REP_ACK. */ - return nbd_send_rep(csock, NBD_REP_ACK, NBD_OPT_LIST); + return nbd_negotiate_send_rep(csock, NBD_REP_ACK, NBD_OPT_LIST); } -static int nbd_handle_export_name(NBDClient *client, uint32_t length) +static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length) { int rc = -EINVAL, csock = client->sock; char name[256]; @@ -242,7 +274,7 @@ static int nbd_handle_export_name(NBDClient *client, uint32_t length) LOG("Bad length received"); goto fail; } - if (read_sync(csock, name, length) != length) { + if (nbd_negotiate_read(csock, name, length) != length) { LOG("read failed"); goto fail; } @@ -261,7 +293,7 @@ fail: return rc; } -static int nbd_receive_options(NBDClient *client) +static int nbd_negotiate_options(NBDClient *client) { int csock = client->sock; uint32_t flags; @@ -280,7 +312,7 @@ static int nbd_receive_options(NBDClient *client) ... Rest of request */ - if (read_sync(csock, &flags, sizeof(flags)) != sizeof(flags)) { + if (nbd_negotiate_read(csock, &flags, sizeof(flags)) != sizeof(flags)) { LOG("read failed"); return -EIO; } @@ -296,7 +328,7 @@ static int nbd_receive_options(NBDClient *client) uint32_t tmp, length; uint64_t magic; - if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) { + if (nbd_negotiate_read(csock, &magic, sizeof(magic)) != sizeof(magic)) { LOG("read failed"); return -EINVAL; } @@ -306,12 +338,13 @@ static int nbd_receive_options(NBDClient *client) return -EINVAL; } - if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { + if (nbd_negotiate_read(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) { LOG("read failed"); return -EINVAL; } - if (read_sync(csock, &length, sizeof(length)) != sizeof(length)) { + if (nbd_negotiate_read(csock, &length, + sizeof(length)) != sizeof(length)) { LOG("read failed"); return -EINVAL; } @@ -320,7 +353,7 @@ static int nbd_receive_options(NBDClient *client) TRACE("Checking option"); switch (be32_to_cpu(tmp)) { case NBD_OPT_LIST: - ret = nbd_handle_list(client, length); + ret = nbd_negotiate_handle_list(client, length); if (ret < 0) { return ret; } @@ -330,19 +363,25 @@ static int nbd_receive_options(NBDClient *client) return -EINVAL; case NBD_OPT_EXPORT_NAME: - return nbd_handle_export_name(client, length); + return nbd_negotiate_handle_export_name(client, length); default: tmp = be32_to_cpu(tmp); LOG("Unsupported option 0x%x", tmp); - nbd_send_rep(client->sock, NBD_REP_ERR_UNSUP, tmp); + nbd_negotiate_send_rep(client->sock, NBD_REP_ERR_UNSUP, tmp); return -EINVAL; } } } -static int nbd_send_negotiate(NBDClient *client) +typedef struct { + NBDClient *client; + Coroutine *co; +} NBDClientNewData; + +static coroutine_fn int nbd_negotiate(NBDClientNewData *data) { + NBDClient *client = data->client; int csock = client->sock; char buf[8 + 8 + 8 + 128]; int rc; @@ -368,7 +407,6 @@ static int nbd_send_negotiate(NBDClient *client) [28 .. 151] reserved (0) */ - qemu_set_block(csock); rc = -EINVAL; TRACE("Beginning negotiation."); @@ -385,16 +423,16 @@ static int nbd_send_negotiate(NBDClient *client) } if (client->exp) { - if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) { + if (nbd_negotiate_write(csock, buf, sizeof(buf)) != sizeof(buf)) { LOG("write failed"); goto fail; } } else { - if (write_sync(csock, buf, 18) != 18) { + if (nbd_negotiate_write(csock, buf, 18) != 18) { LOG("write failed"); goto fail; } - rc = nbd_receive_options(client); + rc = nbd_negotiate_options(client); if (rc != 0) { LOG("option negotiation failed"); goto fail; @@ -403,7 +441,8 @@ static int nbd_send_negotiate(NBDClient *client) assert ((client->exp->nbdflags & ~65535) == 0); cpu_to_be64w((uint64_t*)(buf + 18), client->exp->size); cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags); - if (write_sync(csock, buf + 18, sizeof(buf) - 18) != sizeof(buf) - 18) { + if (nbd_negotiate_write(csock, buf + 18, + sizeof(buf) - 18) != sizeof(buf) - 18) { LOG("write failed"); goto fail; } @@ -412,7 +451,6 @@ static int nbd_send_negotiate(NBDClient *client) TRACE("Negotiation succeeded."); rc = 0; fail: - qemu_set_nonblock(csock); return rc; } @@ -1028,25 +1066,43 @@ static void nbd_update_can_read(NBDClient *client) } } -void nbd_client_new(NBDExport *exp, int csock, void (*close_fn)(NBDClient *)) +static coroutine_fn void nbd_co_client_start(void *opaque) { - NBDClient *client; - client = g_malloc0(sizeof(NBDClient)); - client->refcount = 1; - client->exp = exp; - client->sock = csock; - client->can_read = true; - if (nbd_send_negotiate(client)) { - shutdown(client->sock, 2); - close_fn(client); - return; + NBDClientNewData *data = opaque; + NBDClient *client = data->client; + NBDExport *exp = client->exp; + + if (exp) { + nbd_export_get(exp); + } + if (nbd_negotiate(data)) { + shutdown(client->sock, 2); + client->close(client); + goto out; } - client->close = close_fn; qemu_co_mutex_init(&client->send_lock); nbd_set_handlers(client); if (exp) { QTAILQ_INSERT_TAIL(&exp->clients, client, next); - nbd_export_get(exp); } +out: + g_free(data); +} + +void nbd_client_new(NBDExport *exp, int csock, void (*close_fn)(NBDClient *)) +{ + NBDClient *client; + NBDClientNewData *data = g_new(NBDClientNewData, 1); + + client = g_malloc0(sizeof(NBDClient)); + client->refcount = 1; + client->exp = exp; + client->sock = csock; + client->can_read = true; + client->close = close_fn; + + data->client = client; + data->co = qemu_coroutine_create(nbd_co_client_start); + qemu_coroutine_enter(data->co, data); } From eb38c3b67018ff8069e4f674a28661931a8a3e4f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 7 Jan 2016 14:32:42 +0100 Subject: [PATCH 12/15] nbd-server: do not check request length except for reads and writes Only reads and writes need to allocate memory correspondent to the request length. Other requests can be sent to the storage without allocating any memory, and thus any request length is acceptable. Reported-by: Sitsofe Wheeler Cc: qemu-block@nongnu.org Reviewed-by: Max Reitz Signed-off-by: Paolo Bonzini --- nbd/server.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 8752885509..c41af0debe 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -818,13 +818,6 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque goto out; } - if (request->len > NBD_MAX_BUFFER_SIZE) { - LOG("len (%u) is larger than max len (%u)", - request->len, NBD_MAX_BUFFER_SIZE); - rc = -EINVAL; - goto out; - } - if ((request->from + request->len) < request->from) { LOG("integer overflow detected! " "you're probably being attacked"); @@ -836,6 +829,13 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque command = request->type & NBD_CMD_MASK_COMMAND; if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { + if (request->len > NBD_MAX_BUFFER_SIZE) { + LOG("len (%u) is larger than max len (%u)", + request->len, NBD_MAX_BUFFER_SIZE); + rc = -EINVAL; + goto out; + } + req->data = blk_blockalign(client->exp->blk, request->len); } if (command == NBD_CMD_WRITE) { From f1c17521e79df863a5771d96974fab0d07f02be0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 7 Jan 2016 14:34:13 +0100 Subject: [PATCH 13/15] nbd-server: do not exit on failed memory allocation The amount of memory allocated in nbd_co_receive_request is driven by the NBD client (possibly a virtual machine). Parallel I/O can cause the server to allocate a large amount of memory; check for failures and return ENOMEM in that case. Cc: qemu-block@nongnu.org Reviewed-by: Max Reitz Signed-off-by: Paolo Bonzini --- block/block-backend.c | 5 +++++ include/sysemu/block-backend.h | 1 + nbd/server.c | 6 +++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/block/block-backend.c b/block/block-backend.c index f41d326b3c..e81375955f 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1033,6 +1033,11 @@ void blk_set_guest_block_size(BlockBackend *blk, int align) blk->guest_block_size = align; } +void *blk_try_blockalign(BlockBackend *blk, size_t size) +{ + return qemu_try_blockalign(blk ? blk->bs : NULL, size); +} + void *blk_blockalign(BlockBackend *blk, size_t size) { return qemu_blockalign(blk ? blk->bs : NULL, size); diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index dc244760a1..1568554e3e 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -148,6 +148,7 @@ int blk_get_flags(BlockBackend *blk); int blk_get_max_transfer_length(BlockBackend *blk); int blk_get_max_iov(BlockBackend *blk); void blk_set_guest_block_size(BlockBackend *blk, int align); +void *blk_try_blockalign(BlockBackend *blk, size_t size); void *blk_blockalign(BlockBackend *blk, size_t size); bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp); void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason); diff --git a/nbd/server.c b/nbd/server.c index c41af0debe..eead339a2c 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -836,7 +836,11 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *reque goto out; } - req->data = blk_blockalign(client->exp->blk, request->len); + req->data = blk_try_blockalign(client->exp->blk, request->len); + if (req->data == NULL) { + rc = -ENOMEM; + goto out; + } } if (command == NBD_CMD_WRITE) { TRACE("Reading %u byte(s)", request->len); From d0d7708ba29cbcc343364a46bff981e0ff88366f Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 11 Jan 2016 12:44:41 +0000 Subject: [PATCH 14/15] qemu-char: add logfile facility to all chardev backends Typically a UNIX guest OS will log boot messages to a serial port in addition to any graphical console. An admin user may also wish to use the serial port for an interactive console. A virtualization management system may wish to collect system boot messages by logging the serial port, but also wish to allow admins interactive access. Currently providing such a feature forces the mgmt app to either provide 2 separate serial ports, one for logging boot messages and one for interactive console login, or to proxy all output via a separate service that can multiplex the two needs onto one serial port. While both are valid approaches, they each have their own downsides. The former causes confusion and extra setup work for VM admins creating disk images. The latter places an extra burden to re-implement much of the QEMU chardev backends logic in libvirt or even higher level mgmt apps and adds extra hops in the data transfer path. A simpler approach that is satisfactory for many use cases is to allow the QEMU chardev backends to have a "logfile" property associated with them. $QEMU -chardev socket,host=localhost,port=9000,\ server=on,nowait,id-charserial0,\ logfile=/var/log/libvirt/qemu/test-serial0.log -device isa-serial,chardev=charserial0,id=serial0 This patch introduces a 'ChardevCommon' struct which is setup as a base for all the ChardevBackend types. Ideally this would be registered directly as a base against ChardevBackend, rather than each type, but the QAPI generator doesn't allow that since the ChardevBackend is a non-discriminated union. The ChardevCommon struct provides the optional 'logfile' parameter, as well as 'logappend' which controls whether QEMU truncates or appends (default truncate). Signed-off-by: Daniel P. Berrange Message-Id: <1452516281-27519-1-git-send-email-berrange@redhat.com> [Call qemu_chr_parse_common if cd->parse is NULL. - Paolo] Signed-off-by: Paolo Bonzini --- backends/baum.c | 7 +- backends/msmouse.c | 6 +- gdbstub.c | 3 +- include/sysemu/char.h | 9 +- qapi-schema.json | 49 +++++++-- qemu-char.c | 248 +++++++++++++++++++++++++++++++++++------- qemu-options.hx | 48 ++++---- spice-qemu-char.c | 20 +++- ui/console.c | 6 +- 9 files changed, 313 insertions(+), 83 deletions(-) diff --git a/backends/baum.c b/backends/baum.c index 723c658ac0..ba32b61002 100644 --- a/backends/baum.c +++ b/backends/baum.c @@ -566,6 +566,7 @@ static CharDriverState *chr_baum_init(const char *id, ChardevReturn *ret, Error **errp) { + ChardevCommon *common = qapi_ChardevDummy_base(backend->u.braille); BaumDriverState *baum; CharDriverState *chr; brlapi_handle_t *handle; @@ -576,8 +577,12 @@ static CharDriverState *chr_baum_init(const char *id, #endif int tty; + chr = qemu_chr_alloc(common, errp); + if (!chr) { + return NULL; + } baum = g_malloc0(sizeof(BaumDriverState)); - baum->chr = chr = qemu_chr_alloc(); + baum->chr = chr; chr->opaque = baum; chr->chr_write = baum_write; diff --git a/backends/msmouse.c b/backends/msmouse.c index 0126fa0b13..476dab5634 100644 --- a/backends/msmouse.c +++ b/backends/msmouse.c @@ -68,9 +68,13 @@ static CharDriverState *qemu_chr_open_msmouse(const char *id, ChardevReturn *ret, Error **errp) { + ChardevCommon *common = qapi_ChardevDummy_base(backend->u.msmouse); CharDriverState *chr; - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(common, errp); + if (!chr) { + return NULL; + } chr->chr_write = msmouse_chr_write; chr->chr_close = msmouse_chr_close; chr->explicit_be_open = true; diff --git a/gdbstub.c b/gdbstub.c index 9c29aa0e87..1a84c1a746 100644 --- a/gdbstub.c +++ b/gdbstub.c @@ -1732,6 +1732,7 @@ int gdbserver_start(const char *device) char gdbstub_device_name[128]; CharDriverState *chr = NULL; CharDriverState *mon_chr; + ChardevCommon common = { 0 }; if (!device) return -1; @@ -1768,7 +1769,7 @@ int gdbserver_start(const char *device) qemu_add_vm_change_state_handler(gdb_vm_state_change, NULL); /* Initialize a monitor terminal for gdb */ - mon_chr = qemu_chr_alloc(); + mon_chr = qemu_chr_alloc(&common, &error_abort); mon_chr->chr_write = gdb_monitor_write; monitor_init(mon_chr, 0); } else { diff --git a/include/sysemu/char.h b/include/sysemu/char.h index aff193f080..598dd2bf49 100644 --- a/include/sysemu/char.h +++ b/include/sysemu/char.h @@ -77,6 +77,7 @@ struct CharDriverState { void *opaque; char *label; char *filename; + int logfd; int be_open; int fe_open; int explicit_fe_open; @@ -89,13 +90,15 @@ struct CharDriverState { }; /** - * @qemu_chr_alloc: + * qemu_chr_alloc: + * @backend: the common backend config + * @errp: pointer to a NULL-initialized error object * * Allocate and initialize a new CharDriverState. * - * Returns: a newly allocated CharDriverState. + * Returns: a newly allocated CharDriverState, or NULL on error. */ -CharDriverState *qemu_chr_alloc(void); +CharDriverState *qemu_chr_alloc(ChardevCommon *backend, Error **errp); /** * @qemu_chr_new_from_opts: diff --git a/qapi-schema.json b/qapi-schema.json index 0c75465031..b3038b215a 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3093,6 +3093,21 @@ ## { 'command': 'screendump', 'data': {'filename': 'str'} } + +## +# @ChardevCommon: +# +# Configuration shared across all chardev backends +# +# @logfile: #optional The name of a logfile to save output +# @logappend: #optional true to append instead of truncate +# (default to false to truncate) +# +# Since: 2.6 +## +{ 'struct': 'ChardevCommon', 'data': { '*logfile': 'str', + '*logappend': 'bool' } } + ## # @ChardevFile: # @@ -3107,7 +3122,8 @@ ## { 'struct': 'ChardevFile', 'data': { '*in' : 'str', 'out' : 'str', - '*append': 'bool' } } + '*append': 'bool' }, + 'base': 'ChardevCommon' } ## # @ChardevHostdev: @@ -3120,7 +3136,8 @@ # # Since: 1.4 ## -{ 'struct': 'ChardevHostdev', 'data': { 'device' : 'str' } } +{ 'struct': 'ChardevHostdev', 'data': { 'device' : 'str' }, + 'base': 'ChardevCommon' } ## # @ChardevSocket: @@ -3147,7 +3164,8 @@ '*wait' : 'bool', '*nodelay' : 'bool', '*telnet' : 'bool', - '*reconnect' : 'int' } } + '*reconnect' : 'int' }, + 'base': 'ChardevCommon' } ## # @ChardevUdp: @@ -3160,7 +3178,8 @@ # Since: 1.5 ## { 'struct': 'ChardevUdp', 'data': { 'remote' : 'SocketAddress', - '*local' : 'SocketAddress' } } + '*local' : 'SocketAddress' }, + 'base': 'ChardevCommon' } ## # @ChardevMux: @@ -3171,7 +3190,8 @@ # # Since: 1.5 ## -{ 'struct': 'ChardevMux', 'data': { 'chardev' : 'str' } } +{ 'struct': 'ChardevMux', 'data': { 'chardev' : 'str' }, + 'base': 'ChardevCommon' } ## # @ChardevStdio: @@ -3184,7 +3204,9 @@ # # Since: 1.5 ## -{ 'struct': 'ChardevStdio', 'data': { '*signal' : 'bool' } } +{ 'struct': 'ChardevStdio', 'data': { '*signal' : 'bool' }, + 'base': 'ChardevCommon' } + ## # @ChardevSpiceChannel: @@ -3195,7 +3217,8 @@ # # Since: 1.5 ## -{ 'struct': 'ChardevSpiceChannel', 'data': { 'type' : 'str' } } +{ 'struct': 'ChardevSpiceChannel', 'data': { 'type' : 'str' }, + 'base': 'ChardevCommon' } ## # @ChardevSpicePort: @@ -3206,7 +3229,8 @@ # # Since: 1.5 ## -{ 'struct': 'ChardevSpicePort', 'data': { 'fqdn' : 'str' } } +{ 'struct': 'ChardevSpicePort', 'data': { 'fqdn' : 'str' }, + 'base': 'ChardevCommon' } ## # @ChardevVC: @@ -3223,7 +3247,8 @@ { 'struct': 'ChardevVC', 'data': { '*width' : 'int', '*height' : 'int', '*cols' : 'int', - '*rows' : 'int' } } + '*rows' : 'int' }, + 'base': 'ChardevCommon' } ## # @ChardevRingbuf: @@ -3234,7 +3259,8 @@ # # Since: 1.5 ## -{ 'struct': 'ChardevRingbuf', 'data': { '*size' : 'int' } } +{ 'struct': 'ChardevRingbuf', 'data': { '*size' : 'int' }, + 'base': 'ChardevCommon' } ## # @ChardevBackend: @@ -3243,7 +3269,8 @@ # # Since: 1.4 (testdev since 2.2) ## -{ 'struct': 'ChardevDummy', 'data': { } } +{ 'struct': 'ChardevDummy', 'data': { }, + 'base': 'ChardevCommon' } { 'union': 'ChardevBackend', 'data': { 'file' : 'ChardevFile', 'serial' : 'ChardevHostdev', diff --git a/qemu-char.c b/qemu-char.c index d7be1851e5..11caa5648d 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -159,10 +159,33 @@ static int sockaddr_to_str(char *dest, int max_len, static QTAILQ_HEAD(CharDriverStateHead, CharDriverState) chardevs = QTAILQ_HEAD_INITIALIZER(chardevs); -CharDriverState *qemu_chr_alloc(void) +static void qemu_chr_free_common(CharDriverState *chr); + +CharDriverState *qemu_chr_alloc(ChardevCommon *backend, Error **errp) { CharDriverState *chr = g_malloc0(sizeof(CharDriverState)); qemu_mutex_init(&chr->chr_write_lock); + + if (backend->has_logfile) { + int flags = O_WRONLY | O_CREAT; + if (backend->has_logappend && + backend->logappend) { + flags |= O_APPEND; + } else { + flags |= O_TRUNC; + } + chr->logfd = qemu_open(backend->logfile, flags, 0666); + if (chr->logfd < 0) { + error_setg_errno(errp, errno, + "Unable to open logfile %s", + backend->logfile); + g_free(chr); + return NULL; + } + } else { + chr->logfd = -1; + } + return chr; } @@ -188,12 +211,45 @@ void qemu_chr_be_generic_open(CharDriverState *s) qemu_chr_be_event(s, CHR_EVENT_OPENED); } + +/* Not reporting errors from writing to logfile, as logs are + * defined to be "best effort" only */ +static void qemu_chr_fe_write_log(CharDriverState *s, + const uint8_t *buf, size_t len) +{ + size_t done = 0; + ssize_t ret; + + if (s->logfd < 0) { + return; + } + + while (done < len) { + do { + ret = write(s->logfd, buf + done, len - done); + if (ret == -1 && errno == EAGAIN) { + g_usleep(100); + } + } while (ret == -1 && errno == EAGAIN); + + if (ret <= 0) { + return; + } + done += ret; + } +} + int qemu_chr_fe_write(CharDriverState *s, const uint8_t *buf, int len) { int ret; qemu_mutex_lock(&s->chr_write_lock); ret = s->chr_write(s, buf, len); + + if (ret > 0) { + qemu_chr_fe_write_log(s, buf, ret); + } + qemu_mutex_unlock(&s->chr_write_lock); return ret; } @@ -218,6 +274,10 @@ int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len) offset += res; } + if (offset > 0) { + qemu_chr_fe_write_log(s, buf, offset); + } + qemu_mutex_unlock(&s->chr_write_lock); if (res < 0) { @@ -365,8 +425,12 @@ static CharDriverState *qemu_chr_open_null(const char *id, Error **errp) { CharDriverState *chr; + ChardevCommon *common = qapi_ChardevDummy_base(backend->u.null); - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(common, errp); + if (!chr) { + return NULL; + } chr->chr_write = null_chr_write; chr->explicit_be_open = true; return chr; @@ -665,6 +729,7 @@ static CharDriverState *qemu_chr_open_mux(const char *id, ChardevMux *mux = backend->u.mux; CharDriverState *chr, *drv; MuxDriver *d; + ChardevCommon *common = qapi_ChardevMux_base(backend->u.mux); drv = qemu_chr_find(mux->chardev); if (drv == NULL) { @@ -672,7 +737,10 @@ static CharDriverState *qemu_chr_open_mux(const char *id, return NULL; } - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(common, errp); + if (!chr) { + return NULL; + } d = g_new0(MuxDriver, 1); chr->opaque = d; @@ -975,12 +1043,16 @@ static void fd_chr_close(struct CharDriverState *chr) } /* open a character device to a unix fd */ -static CharDriverState *qemu_chr_open_fd(int fd_in, int fd_out) +static CharDriverState *qemu_chr_open_fd(int fd_in, int fd_out, + ChardevCommon *backend, Error **errp) { CharDriverState *chr; FDCharDriver *s; - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(backend, errp); + if (!chr) { + return NULL; + } s = g_new0(FDCharDriver, 1); s->fd_in = io_channel_from_fd(fd_in); s->fd_out = io_channel_from_fd(fd_out); @@ -1005,6 +1077,7 @@ static CharDriverState *qemu_chr_open_pipe(const char *id, char filename_in[CHR_MAX_FILENAME_SIZE]; char filename_out[CHR_MAX_FILENAME_SIZE]; const char *filename = opts->device; + ChardevCommon *common = qapi_ChardevHostdev_base(backend->u.pipe); snprintf(filename_in, CHR_MAX_FILENAME_SIZE, "%s.in", filename); snprintf(filename_out, CHR_MAX_FILENAME_SIZE, "%s.out", filename); @@ -1021,7 +1094,7 @@ static CharDriverState *qemu_chr_open_pipe(const char *id, return NULL; } } - return qemu_chr_open_fd(fd_in, fd_out); + return qemu_chr_open_fd(fd_in, fd_out, common, errp); } /* init terminal so that we can grab keys */ @@ -1081,6 +1154,7 @@ static CharDriverState *qemu_chr_open_stdio(const char *id, ChardevStdio *opts = backend->u.stdio; CharDriverState *chr; struct sigaction act; + ChardevCommon *common = qapi_ChardevStdio_base(backend->u.stdio); if (is_daemonized()) { error_setg(errp, "cannot use stdio with -daemonize"); @@ -1102,7 +1176,7 @@ static CharDriverState *qemu_chr_open_stdio(const char *id, act.sa_handler = term_stdio_handler; sigaction(SIGCONT, &act, NULL); - chr = qemu_chr_open_fd(0, 1); + chr = qemu_chr_open_fd(0, 1, common, errp); chr->chr_close = qemu_chr_close_stdio; chr->chr_set_echo = qemu_chr_set_echo_stdio; if (opts->has_signal) { @@ -1324,6 +1398,7 @@ static CharDriverState *qemu_chr_open_pty(const char *id, PtyCharDriver *s; int master_fd, slave_fd; char pty_name[PATH_MAX]; + ChardevCommon *common = qapi_ChardevDummy_base(backend->u.pty); master_fd = qemu_openpty_raw(&slave_fd, pty_name); if (master_fd < 0) { @@ -1334,7 +1409,11 @@ static CharDriverState *qemu_chr_open_pty(const char *id, close(slave_fd); qemu_set_nonblock(master_fd); - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(common, errp); + if (!chr) { + close(master_fd); + return NULL; + } chr->filename = g_strdup_printf("pty:%s", pty_name); ret->pty = g_strdup(pty_name); @@ -1557,12 +1636,14 @@ static void qemu_chr_close_tty(CharDriverState *chr) } } -static CharDriverState *qemu_chr_open_tty_fd(int fd) +static CharDriverState *qemu_chr_open_tty_fd(int fd, + ChardevCommon *backend, + Error **errp) { CharDriverState *chr; tty_serial_init(fd, 115200, 'N', 8, 1); - chr = qemu_chr_open_fd(fd, fd); + chr = qemu_chr_open_fd(fd, fd, backend, errp); chr->chr_ioctl = tty_serial_ioctl; chr->chr_close = qemu_chr_close_tty; return chr; @@ -1682,7 +1763,9 @@ static void pp_close(CharDriverState *chr) qemu_chr_be_event(chr, CHR_EVENT_CLOSED); } -static CharDriverState *qemu_chr_open_pp_fd(int fd, Error **errp) +static CharDriverState *qemu_chr_open_pp_fd(int fd, + ChardevCommon *backend, + Error **errp) { CharDriverState *chr; ParallelCharDriver *drv; @@ -1697,7 +1780,10 @@ static CharDriverState *qemu_chr_open_pp_fd(int fd, Error **errp) drv->fd = fd; drv->mode = IEEE1284_MODE_COMPAT; - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(backend, errp); + if (!chr) { + return NULL; + } chr->chr_write = null_chr_write; chr->chr_ioctl = pp_ioctl; chr->chr_close = pp_close; @@ -1748,11 +1834,16 @@ static int pp_ioctl(CharDriverState *chr, int cmd, void *arg) return 0; } -static CharDriverState *qemu_chr_open_pp_fd(int fd, Error **errp) +static CharDriverState *qemu_chr_open_pp_fd(int fd, + ChardevBackend *backend, + Error **errp) { CharDriverState *chr; - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(common, errp); + if (!chr) { + return NULL; + } chr->opaque = (void *)(intptr_t)fd; chr->chr_write = null_chr_write; chr->chr_ioctl = pp_ioctl; @@ -1978,12 +2069,16 @@ static int win_chr_poll(void *opaque) } static CharDriverState *qemu_chr_open_win_path(const char *filename, + ChardevCommon *backend, Error **errp) { CharDriverState *chr; WinCharState *s; - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(backend, errp); + if (!chr) { + return NULL; + } s = g_new0(WinCharState, 1); chr->opaque = s; chr->chr_write = win_chr_write; @@ -1991,7 +2086,7 @@ static CharDriverState *qemu_chr_open_win_path(const char *filename, if (win_chr_init(chr, filename, errp) < 0) { g_free(s); - g_free(chr); + qemu_chr_free_common(chr); return NULL; } return chr; @@ -2086,8 +2181,12 @@ static CharDriverState *qemu_chr_open_pipe(const char *id, const char *filename = opts->device; CharDriverState *chr; WinCharState *s; + ChardevCommon *common = qapi_ChardevHostdev_base(backend->u.pipe); - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(common, errp); + if (!chr) { + return NULL; + } s = g_new0(WinCharState, 1); chr->opaque = s; chr->chr_write = win_chr_write; @@ -2095,18 +2194,23 @@ static CharDriverState *qemu_chr_open_pipe(const char *id, if (win_chr_pipe_init(chr, filename, errp) < 0) { g_free(s); - g_free(chr); + qemu_chr_free_common(chr); return NULL; } return chr; } -static CharDriverState *qemu_chr_open_win_file(HANDLE fd_out) +static CharDriverState *qemu_chr_open_win_file(HANDLE fd_out, + ChardevCommon *backend, + Error **errp) { CharDriverState *chr; WinCharState *s; - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(backend, errp); + if (!chr) { + return NULL; + } s = g_new0(WinCharState, 1); s->hcom = fd_out; chr->opaque = s; @@ -2119,7 +2223,9 @@ static CharDriverState *qemu_chr_open_win_con(const char *id, ChardevReturn *ret, Error **errp) { - return qemu_chr_open_win_file(GetStdHandle(STD_OUTPUT_HANDLE)); + ChardevCommon *common = qapi_ChardevDummy_base(backend->u.console); + return qemu_chr_open_win_file(GetStdHandle(STD_OUTPUT_HANDLE), + common, errp); } static int win_stdio_write(CharDriverState *chr, const uint8_t *buf, int len) @@ -2267,8 +2373,12 @@ static CharDriverState *qemu_chr_open_stdio(const char *id, WinStdioCharState *stdio; DWORD dwMode; int is_console = 0; + ChardevCommon *common = qapi_ChardevStdio_base(backend->u.stdio); - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(common, errp); + if (!chr) { + return NULL; + } stdio = g_new0(WinStdioCharState, 1); stdio->hStdIn = GetStdHandle(STD_INPUT_HANDLE); @@ -2440,12 +2550,17 @@ static void udp_chr_close(CharDriverState *chr) qemu_chr_be_event(chr, CHR_EVENT_CLOSED); } -static CharDriverState *qemu_chr_open_udp_fd(int fd) +static CharDriverState *qemu_chr_open_udp_fd(int fd, + ChardevCommon *backend, + Error **errp) { CharDriverState *chr = NULL; NetCharDriver *s = NULL; - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(backend, errp); + if (!chr) { + return NULL; + } s = g_new0(NetCharDriver, 1); s->fd = fd; @@ -2856,7 +2971,7 @@ static int tcp_chr_sync_read(CharDriverState *chr, const uint8_t *buf, int len) #ifndef _WIN32 CharDriverState *qemu_chr_open_eventfd(int eventfd) { - CharDriverState *chr = qemu_chr_open_fd(eventfd, eventfd); + CharDriverState *chr = qemu_chr_open_fd(eventfd, eventfd, NULL, NULL); if (chr) { chr->avail_connections = 1; @@ -3138,10 +3253,14 @@ static CharDriverState *qemu_chr_open_ringbuf(const char *id, Error **errp) { ChardevRingbuf *opts = backend->u.ringbuf; + ChardevCommon *common = qapi_ChardevRingbuf_base(backend->u.ringbuf); CharDriverState *chr; RingBufCharDriver *d; - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(common, errp); + if (!chr) { + return NULL; + } d = g_malloc(sizeof(*d)); d->size = opts->has_size ? opts->size : 65536; @@ -3164,7 +3283,7 @@ static CharDriverState *qemu_chr_open_ringbuf(const char *id, fail: g_free(d); - g_free(chr); + qemu_chr_free_common(chr); return NULL; } @@ -3408,6 +3527,18 @@ fail: return NULL; } +static void qemu_chr_parse_common(QemuOpts *opts, ChardevCommon *backend) +{ + const char *logfile = qemu_opt_get(opts, "logfile"); + + backend->has_logfile = logfile != NULL; + backend->logfile = logfile ? g_strdup(logfile) : NULL; + + backend->has_logappend = true; + backend->logappend = qemu_opt_get_bool(opts, "logappend", false); +} + + static void qemu_chr_parse_file_out(QemuOpts *opts, ChardevBackend *backend, Error **errp) { @@ -3418,6 +3549,7 @@ static void qemu_chr_parse_file_out(QemuOpts *opts, ChardevBackend *backend, return; } backend->u.file = g_new0(ChardevFile, 1); + qemu_chr_parse_common(opts, qapi_ChardevFile_base(backend->u.file)); backend->u.file->out = g_strdup(path); backend->u.file->has_append = true; @@ -3428,6 +3560,7 @@ static void qemu_chr_parse_stdio(QemuOpts *opts, ChardevBackend *backend, Error **errp) { backend->u.stdio = g_new0(ChardevStdio, 1); + qemu_chr_parse_common(opts, qapi_ChardevStdio_base(backend->u.stdio)); backend->u.stdio->has_signal = true; backend->u.stdio->signal = qemu_opt_get_bool(opts, "signal", true); } @@ -3443,6 +3576,7 @@ static void qemu_chr_parse_serial(QemuOpts *opts, ChardevBackend *backend, return; } backend->u.serial = g_new0(ChardevHostdev, 1); + qemu_chr_parse_common(opts, qapi_ChardevHostdev_base(backend->u.serial)); backend->u.serial->device = g_strdup(device); } #endif @@ -3458,6 +3592,7 @@ static void qemu_chr_parse_parallel(QemuOpts *opts, ChardevBackend *backend, return; } backend->u.parallel = g_new0(ChardevHostdev, 1); + qemu_chr_parse_common(opts, qapi_ChardevHostdev_base(backend->u.parallel)); backend->u.parallel->device = g_strdup(device); } #endif @@ -3472,6 +3607,7 @@ static void qemu_chr_parse_pipe(QemuOpts *opts, ChardevBackend *backend, return; } backend->u.pipe = g_new0(ChardevHostdev, 1); + qemu_chr_parse_common(opts, qapi_ChardevHostdev_base(backend->u.pipe)); backend->u.pipe->device = g_strdup(device); } @@ -3481,6 +3617,7 @@ static void qemu_chr_parse_ringbuf(QemuOpts *opts, ChardevBackend *backend, int val; backend->u.ringbuf = g_new0(ChardevRingbuf, 1); + qemu_chr_parse_common(opts, qapi_ChardevRingbuf_base(backend->u.ringbuf)); val = qemu_opt_get_size(opts, "size", 0); if (val != 0) { @@ -3499,6 +3636,7 @@ static void qemu_chr_parse_mux(QemuOpts *opts, ChardevBackend *backend, return; } backend->u.mux = g_new0(ChardevMux, 1); + qemu_chr_parse_common(opts, qapi_ChardevMux_base(backend->u.mux)); backend->u.mux->chardev = g_strdup(chardev); } @@ -3527,6 +3665,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend, } backend->u.socket = g_new0(ChardevSocket, 1); + qemu_chr_parse_common(opts, qapi_ChardevSocket_base(backend->u.socket)); backend->u.socket->has_nodelay = true; backend->u.socket->nodelay = do_nodelay; @@ -3588,6 +3727,7 @@ static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend, } backend->u.udp = g_new0(ChardevUdp, 1); + qemu_chr_parse_common(opts, qapi_ChardevUdp_base(backend->u.udp)); addr = g_new0(SocketAddress, 1); addr->type = SOCKET_ADDRESS_KIND_INET; @@ -3687,7 +3827,12 @@ CharDriverState *qemu_chr_new_from_opts(QemuOpts *opts, error_propagate(errp, local_err); goto qapi_out; } + } else { + ChardevCommon *cc = g_new0(ChardevCommon, 1); + qemu_chr_parse_common(opts, cc); + backend->u.data = cc; } + ret = qmp_chardev_add(bid ? bid : id, backend, errp); if (!ret) { goto qapi_out; @@ -3819,15 +3964,23 @@ void qemu_chr_fe_release(CharDriverState *s) s->avail_connections++; } +static void qemu_chr_free_common(CharDriverState *chr) +{ + g_free(chr->filename); + g_free(chr->label); + qemu_opts_del(chr->opts); + if (chr->logfd != -1) { + close(chr->logfd); + } + g_free(chr); +} + void qemu_chr_free(CharDriverState *chr) { if (chr->chr_close) { chr->chr_close(chr); } - g_free(chr->filename); - g_free(chr->label); - qemu_opts_del(chr->opts); - g_free(chr); + qemu_chr_free_common(chr); } void qemu_chr_delete(CharDriverState *chr) @@ -3982,6 +4135,12 @@ QemuOptsList qemu_chardev_opts = { },{ .name = "append", .type = QEMU_OPT_BOOL, + },{ + .name = "logfile", + .type = QEMU_OPT_STRING, + },{ + .name = "logappend", + .type = QEMU_OPT_BOOL, }, { /* end of list */ } }, @@ -3995,6 +4154,7 @@ static CharDriverState *qmp_chardev_open_file(const char *id, Error **errp) { ChardevFile *file = backend->u.file; + ChardevCommon *common = qapi_ChardevFile_base(backend->u.file); HANDLE out; if (file->has_in) { @@ -4008,7 +4168,7 @@ static CharDriverState *qmp_chardev_open_file(const char *id, error_setg(errp, "open %s failed", file->out); return NULL; } - return qemu_chr_open_win_file(out); + return qemu_chr_open_win_file(out, common, errp); } static CharDriverState *qmp_chardev_open_serial(const char *id, @@ -4017,7 +4177,8 @@ static CharDriverState *qmp_chardev_open_serial(const char *id, Error **errp) { ChardevHostdev *serial = backend->u.serial; - return qemu_chr_open_win_path(serial->device, errp); + ChardevCommon *common = qapi_ChardevHostdev_base(backend->u.serial); + return qemu_chr_open_win_path(serial->device, common, errp); } #else /* WIN32 */ @@ -4040,6 +4201,7 @@ static CharDriverState *qmp_chardev_open_file(const char *id, Error **errp) { ChardevFile *file = backend->u.file; + ChardevCommon *common = qapi_ChardevFile_base(backend->u.file); int flags, in = -1, out; flags = O_WRONLY | O_CREAT | O_BINARY; @@ -4063,7 +4225,7 @@ static CharDriverState *qmp_chardev_open_file(const char *id, } } - return qemu_chr_open_fd(in, out); + return qemu_chr_open_fd(in, out, common, errp); } #ifdef HAVE_CHARDEV_SERIAL @@ -4073,6 +4235,7 @@ static CharDriverState *qmp_chardev_open_serial(const char *id, Error **errp) { ChardevHostdev *serial = backend->u.serial; + ChardevCommon *common = qapi_ChardevHostdev_base(backend->u.serial); int fd; fd = qmp_chardev_open_file_source(serial->device, O_RDWR, errp); @@ -4080,7 +4243,7 @@ static CharDriverState *qmp_chardev_open_serial(const char *id, return NULL; } qemu_set_nonblock(fd); - return qemu_chr_open_tty_fd(fd); + return qemu_chr_open_tty_fd(fd, common, errp); } #endif @@ -4091,13 +4254,14 @@ static CharDriverState *qmp_chardev_open_parallel(const char *id, Error **errp) { ChardevHostdev *parallel = backend->u.parallel; + ChardevCommon *common = qapi_ChardevHostdev_base(backend->u.parallel); int fd; fd = qmp_chardev_open_file_source(parallel->device, O_RDWR, errp); if (fd < 0) { return NULL; } - return qemu_chr_open_pp_fd(fd, errp); + return qemu_chr_open_pp_fd(fd, common, errp); } #endif @@ -4142,8 +4306,12 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, bool is_telnet = sock->has_telnet ? sock->telnet : false; bool is_waitconnect = sock->has_wait ? sock->wait : false; int64_t reconnect = sock->has_reconnect ? sock->reconnect : 0; + ChardevCommon *common = qapi_ChardevSocket_base(backend->u.socket); - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(common, errp); + if (!chr) { + return NULL; + } s = g_new0(TCPCharDriver, 1); s->fd = -1; @@ -4182,8 +4350,7 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, socket_try_connect(chr); } else if (!qemu_chr_open_socket_fd(chr, errp)) { g_free(s); - g_free(chr->filename); - g_free(chr); + qemu_chr_free_common(chr); return NULL; } @@ -4203,13 +4370,14 @@ static CharDriverState *qmp_chardev_open_udp(const char *id, Error **errp) { ChardevUdp *udp = backend->u.udp; + ChardevCommon *common = qapi_ChardevUdp_base(backend->u.udp); int fd; fd = socket_dgram(udp->remote, udp->local, errp); if (fd < 0) { return NULL; } - return qemu_chr_open_udp_fd(fd); + return qemu_chr_open_udp_fd(fd, common, errp); } ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend, diff --git a/qemu-options.hx b/qemu-options.hx index 215d00ddd3..b4763ba226 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2089,40 +2089,43 @@ The general form of a character device option is: ETEXI DEF("chardev", HAS_ARG, QEMU_OPTION_chardev, - "-chardev null,id=id[,mux=on|off]\n" + "-chardev null,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4][,ipv6][,nodelay][,reconnect=seconds]\n" - " [,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off] (tcp)\n" - "-chardev socket,id=id,path=path[,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off] (unix)\n" + " [,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off]\n" + " [,logfile=PATH][,logappend=on|off] (tcp)\n" + "-chardev socket,id=id,path=path[,server][,nowait][,telnet][,reconnect=seconds]\n" + " [,mux=on|off][,logfile=PATH][,logappend=on|off] (unix)\n" "-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr]\n" " [,localport=localport][,ipv4][,ipv6][,mux=on|off]\n" - "-chardev msmouse,id=id[,mux=on|off]\n" + " [,logfile=PATH][,logappend=on|off]\n" + "-chardev msmouse,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" "-chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]]\n" - " [,mux=on|off]\n" - "-chardev ringbuf,id=id[,size=size]\n" - "-chardev file,id=id,path=path[,mux=on|off]\n" - "-chardev pipe,id=id,path=path[,mux=on|off]\n" + " [,mux=on|off][,logfile=PATH][,logappend=on|off]\n" + "-chardev ringbuf,id=id[,size=size][,logfile=PATH][,logappend=on|off]\n" + "-chardev file,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" + "-chardev pipe,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" #ifdef _WIN32 - "-chardev console,id=id[,mux=on|off]\n" - "-chardev serial,id=id,path=path[,mux=on|off]\n" + "-chardev console,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" + "-chardev serial,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" #else - "-chardev pty,id=id[,mux=on|off]\n" - "-chardev stdio,id=id[,mux=on|off][,signal=on|off]\n" + "-chardev pty,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" + "-chardev stdio,id=id[,mux=on|off][,signal=on|off][,logfile=PATH][,logappend=on|off]\n" #endif #ifdef CONFIG_BRLAPI - "-chardev braille,id=id[,mux=on|off]\n" + "-chardev braille,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" #endif #if defined(__linux__) || defined(__sun__) || defined(__FreeBSD__) \ || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) - "-chardev serial,id=id,path=path[,mux=on|off]\n" - "-chardev tty,id=id,path=path[,mux=on|off]\n" + "-chardev serial,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" + "-chardev tty,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" #endif #if defined(__linux__) || defined(__FreeBSD__) || defined(__DragonFly__) - "-chardev parallel,id=id,path=path[,mux=on|off]\n" - "-chardev parport,id=id,path=path[,mux=on|off]\n" + "-chardev parallel,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" + "-chardev parport,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" #endif #if defined(CONFIG_SPICE) - "-chardev spicevmc,id=id,name=name[,debug=debug]\n" - "-chardev spiceport,id=id,name=name[,debug=debug]\n" + "-chardev spicevmc,id=id,name=name[,debug=debug][,logfile=PATH][,logappend=on|off]\n" + "-chardev spiceport,id=id,name=name[,debug=debug][,logfile=PATH][,logappend=on|off]\n" #endif , QEMU_ARCH_ALL ) @@ -2158,7 +2161,12 @@ A character device may be used in multiplexing mode by multiple front-ends. The key sequence of @key{Control-a} and @key{c} will rotate the input focus between attached front-ends. Specify @option{mux=on} to enable this mode. -Options to each backend are described below. +Every backend supports the @option{logfile} option, which supplies the path +to a file to record all data transmitted via the backend. The @option{logappend} +option controls whether the log file will be truncated or appended to when +opened. + +Further options to each backend are described below. @item -chardev null ,id=@var{id} A void device. This device will not emit any data, and will drop any data it diff --git a/spice-qemu-char.c b/spice-qemu-char.c index e70e0f7366..8951d7ca37 100644 --- a/spice-qemu-char.c +++ b/spice-qemu-char.c @@ -271,13 +271,18 @@ static void spice_chr_accept_input(struct CharDriverState *chr) } static CharDriverState *chr_open(const char *subtype, - void (*set_fe_open)(struct CharDriverState *, int)) - + void (*set_fe_open)(struct CharDriverState *, + int), + ChardevCommon *backend, + Error **errp) { CharDriverState *chr; SpiceCharDriver *s; - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(backend, errp); + if (!chr) { + return NULL; + } s = g_malloc0(sizeof(SpiceCharDriver)); s->chr = chr; s->active = false; @@ -303,6 +308,7 @@ static CharDriverState *qemu_chr_open_spice_vmc(const char *id, { const char *type = backend->u.spicevmc->type; const char **psubtype = spice_server_char_device_recognized_subtypes(); + ChardevCommon *common = qapi_ChardevSpiceChannel_base(backend->u.spicevmc); for (; *psubtype != NULL; ++psubtype) { if (strcmp(type, *psubtype) == 0) { @@ -315,7 +321,7 @@ static CharDriverState *qemu_chr_open_spice_vmc(const char *id, return NULL; } - return chr_open(type, spice_vmc_set_fe_open); + return chr_open(type, spice_vmc_set_fe_open, common, errp); } #if SPICE_SERVER_VERSION >= 0x000c02 @@ -325,6 +331,7 @@ static CharDriverState *qemu_chr_open_spice_port(const char *id, Error **errp) { const char *name = backend->u.spiceport->fqdn; + ChardevCommon *common = qapi_ChardevSpicePort_base(backend->u.spiceport); CharDriverState *chr; SpiceCharDriver *s; @@ -333,7 +340,10 @@ static CharDriverState *qemu_chr_open_spice_port(const char *id, return NULL; } - chr = chr_open("port", spice_port_set_fe_open); + chr = chr_open("port", spice_port_set_fe_open, common, errp); + if (!chr) { + return NULL; + } s = chr->opaque; s->sin.portname = g_strdup(name); diff --git a/ui/console.c b/ui/console.c index 4b65c34672..fe950c6026 100644 --- a/ui/console.c +++ b/ui/console.c @@ -1953,12 +1953,16 @@ static void text_console_do_init(CharDriverState *chr, DisplayState *ds) static CharDriverState *text_console_init(ChardevVC *vc, Error **errp) { + ChardevCommon *common = qapi_ChardevVC_base(vc); CharDriverState *chr; QemuConsole *s; unsigned width = 0; unsigned height = 0; - chr = qemu_chr_alloc(); + chr = qemu_chr_alloc(common, errp); + if (!chr) { + return NULL; + } if (vc->has_width) { width = vc->width; From fefd749ce29837d399a38d6052ca9968fa7352e7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 15 Jan 2016 16:16:25 +0100 Subject: [PATCH 15/15] qemu-char: do not leak QemuMutex when freeing a character device The leak is only apparent on Win32. On POSIX platforms destroying a mutex is not necessary. Reported-by: Eric Blake Reviewed-by: Daniel P. Berrange Signed-off-by: Paolo Bonzini --- qemu-char.c | 1 + 1 file changed, 1 insertion(+) diff --git a/qemu-char.c b/qemu-char.c index 11caa5648d..e133f4fc35 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -3972,6 +3972,7 @@ static void qemu_chr_free_common(CharDriverState *chr) if (chr->logfd != -1) { close(chr->logfd); } + qemu_mutex_destroy(&chr->chr_write_lock); g_free(chr); }