From ade0075523478fa015afd5c6f6cc70681687818d Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Tue, 12 Feb 2019 13:23:47 +0200 Subject: [PATCH 01/18] contrib/rdmacm-mux: Fix out-of-bounds risk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function get_fd extract context from the received MAD message and uses it as a key to fetch the destination fd from the mapping table. A context can be dgid in case of CM request message or comm_id in case of CM SIDR response message. When MAD message with a smaller size as expected for the message type received we are hitting out-of-bounds where we are looking for the context out of message boundaries. Fix it by validating the message size. Reported-by Sam Smith Signed-off-by: Yuval Shaia Message-Id: <20190212112347.1605-1-yuval.shaia@oracle.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Marcel Apfelbaum --- contrib/rdmacm-mux/main.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/contrib/rdmacm-mux/main.c b/contrib/rdmacm-mux/main.c index ae88c77a1e..21cc804367 100644 --- a/contrib/rdmacm-mux/main.c +++ b/contrib/rdmacm-mux/main.c @@ -300,7 +300,7 @@ static void hash_tbl_remove_fd_ifid_pair(int fd) pthread_rwlock_unlock(&server.lock); } -static int get_fd(const char *mad, int *fd, __be64 *gid_ifid) +static int get_fd(const char *mad, int umad_len, int *fd, __be64 *gid_ifid) { struct umad_hdr *hdr = (struct umad_hdr *)mad; char *data = (char *)hdr + sizeof(*hdr); @@ -308,13 +308,35 @@ static int get_fd(const char *mad, int *fd, __be64 *gid_ifid) uint16_t attr_id = be16toh(hdr->attr_id); int rc = 0; + if (umad_len <= sizeof(*hdr)) { + rc = -EINVAL; + syslog(LOG_DEBUG, "Ignoring MAD packets with header only\n"); + goto out; + } + switch (attr_id) { case UMAD_CM_ATTR_REQ: + if (unlikely(umad_len < sizeof(*hdr) + CM_REQ_DGID_POS + + sizeof(*gid_ifid))) { + rc = -EINVAL; + syslog(LOG_WARNING, + "Invalid MAD packet size (%d) for attr_id 0x%x\n", umad_len, + attr_id); + goto out; + } memcpy(gid_ifid, data + CM_REQ_DGID_POS, sizeof(*gid_ifid)); rc = hash_tbl_search_fd_by_ifid(fd, gid_ifid); break; case UMAD_CM_ATTR_SIDR_REQ: + if (unlikely(umad_len < sizeof(*hdr) + CM_SIDR_REQ_DGID_POS + + sizeof(*gid_ifid))) { + rc = -EINVAL; + syslog(LOG_WARNING, + "Invalid MAD packet size (%d) for attr_id 0x%x\n", umad_len, + attr_id); + goto out; + } memcpy(gid_ifid, data + CM_SIDR_REQ_DGID_POS, sizeof(*gid_ifid)); rc = hash_tbl_search_fd_by_ifid(fd, gid_ifid); break; @@ -331,6 +353,13 @@ static int get_fd(const char *mad, int *fd, __be64 *gid_ifid) data += sizeof(comm_id); /* Fall through */ case UMAD_CM_ATTR_SIDR_REP: + if (unlikely(umad_len < sizeof(*hdr) + sizeof(comm_id))) { + rc = -EINVAL; + syslog(LOG_WARNING, + "Invalid MAD packet size (%d) for attr_id 0x%x\n", umad_len, + attr_id); + goto out; + } memcpy(&comm_id, data, sizeof(comm_id)); if (comm_id) { rc = hash_tbl_search_fd_by_comm_id(comm_id, fd, gid_ifid); @@ -344,6 +373,7 @@ static int get_fd(const char *mad, int *fd, __be64 *gid_ifid) syslog(LOG_DEBUG, "mad_to_vm: %d 0x%x 0x%x\n", *fd, attr_id, comm_id); +out: return rc; } @@ -372,7 +402,8 @@ static void *umad_recv_thread_func(void *args) } while (rc && server.run); if (server.run) { - rc = get_fd(msg.umad.mad, &fd, &msg.hdr.sgid.global.interface_id); + rc = get_fd(msg.umad.mad, msg.umad_len, &fd, + &msg.hdr.sgid.global.interface_id); if (rc) { continue; } From 4d71b38ae8fa436f1707fe98b469a9cea4079503 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Mon, 11 Mar 2019 03:29:05 -0700 Subject: [PATCH 02/18] hw/rdma: Switch to generic error reporting way Utilize error_report for all pr_err calls and some pr_dbg that are considered as errors. For the remaining pr_dbg calls, the important ones were replaced by trace points while other deleted. Some of the functions got renamed to include prefix "rdma/pvrdma" in the function name. Signed-off-by: Yuval Shaia Reviewed-by: Marcel Apfelbaum Message-Id: <1552300155-25216-2-git-send-email-yuval.shaia@oracle.com> Reviewed-by: Kamal Heib Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 356 ++++++++++++++-------------------- hw/rdma/rdma_backend.h | 4 +- hw/rdma/rdma_rm.c | 130 +++++-------- hw/rdma/rdma_rm.h | 10 +- hw/rdma/rdma_utils.c | 15 +- hw/rdma/rdma_utils.h | 45 +---- hw/rdma/trace-events | 32 ++- hw/rdma/vmw/pvrdma.h | 2 +- hw/rdma/vmw/pvrdma_cmd.c | 113 +++-------- hw/rdma/vmw/pvrdma_dev_ring.c | 26 +-- hw/rdma/vmw/pvrdma_main.c | 144 +++++--------- hw/rdma/vmw/pvrdma_qp_ops.c | 49 ++--- hw/rdma/vmw/trace-events | 16 +- 13 files changed, 364 insertions(+), 578 deletions(-) diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index fd571f21e5..d138591c86 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -14,7 +14,6 @@ */ #include "qemu/osdep.h" -#include "qemu/error-report.h" #include "sysemu/sysemu.h" #include "qapi/error.h" #include "qapi/qmp/qlist.h" @@ -39,7 +38,6 @@ typedef struct BackendCtx { void *up_ctx; - bool is_tx_req; struct ibv_sge sge; /* Used to save MAD recv buffer */ } BackendCtx; @@ -52,7 +50,7 @@ static void (*comp_handler)(void *ctx, struct ibv_wc *wc); static void dummy_comp_handler(void *ctx, struct ibv_wc *wc) { - pr_err("No completion handler is registered\n"); + rdma_error_report("No completion handler is registered"); } static inline void complete_work(enum ibv_wc_status status, uint32_t vendor_err, @@ -66,29 +64,24 @@ static inline void complete_work(enum ibv_wc_status status, uint32_t vendor_err, comp_handler(ctx, &wc); } -static void poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) +static void rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) { int i, ne; BackendCtx *bctx; struct ibv_wc wc[2]; - pr_dbg("Entering poll_cq loop on cq %p\n", ibcq); do { ne = ibv_poll_cq(ibcq, ARRAY_SIZE(wc), wc); - pr_dbg("Got %d completion(s) from cq %p\n", ne, ibcq); + trace_rdma_poll_cq(ne, ibcq); for (i = 0; i < ne; i++) { - pr_dbg("wr_id=0x%" PRIx64 "\n", wc[i].wr_id); - pr_dbg("status=%d\n", wc[i].status); - bctx = rdma_rm_get_cqe_ctx(rdma_dev_res, wc[i].wr_id); if (unlikely(!bctx)) { - pr_dbg("Error: Failed to find ctx for req %" PRId64 "\n", - wc[i].wr_id); + rdma_error_report("No matching ctx for req %"PRId64, + wc[i].wr_id); continue; } - pr_dbg("Processing %s CQE\n", bctx->is_tx_req ? "send" : "recv"); comp_handler(bctx->up_ctx, &wc[i]); @@ -98,7 +91,7 @@ static void poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) } while (ne > 0); if (ne < 0) { - pr_dbg("Got error %d from ibv_poll_cq\n", ne); + rdma_error_report("ibv_poll_cq fail, rc=%d, errno=%d", ne, errno); } } @@ -115,12 +108,10 @@ static void *comp_handler_thread(void *arg) flags = fcntl(backend_dev->channel->fd, F_GETFL); rc = fcntl(backend_dev->channel->fd, F_SETFL, flags | O_NONBLOCK); if (rc < 0) { - pr_dbg("Fail to change to non-blocking mode\n"); + rdma_error_report("Failed to change backend channel FD to non-blocking"); return NULL; } - pr_dbg("Starting\n"); - pfds[0].fd = backend_dev->channel->fd; pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; @@ -132,27 +123,25 @@ static void *comp_handler_thread(void *arg) } while (!rc && backend_dev->comp_thread.run); if (backend_dev->comp_thread.run) { - pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel); rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx); - pr_dbg("ibv_get_cq_event=%d\n", rc); if (unlikely(rc)) { - pr_dbg("---> ibv_get_cq_event (%d)\n", rc); + rdma_error_report("ibv_get_cq_event fail, rc=%d, errno=%d", rc, + errno); continue; } rc = ibv_req_notify_cq(ev_cq, 0); if (unlikely(rc)) { - pr_dbg("Error %d from ibv_req_notify_cq\n", rc); + rdma_error_report("ibv_req_notify_cq fail, rc=%d, errno=%d", rc, + errno); } - poll_cq(backend_dev->rdma_dev_res, ev_cq); + rdma_poll_cq(backend_dev->rdma_dev_res, ev_cq); ibv_ack_cq_events(ev_cq, 1); } } - pr_dbg("Going down\n"); - /* TODO: Post cqe for all remaining buffs that were posted */ backend_dev->comp_thread.is_running = false; @@ -177,55 +166,54 @@ static inline int rdmacm_mux_can_process_async(RdmaBackendDev *backend_dev) return atomic_read(&backend_dev->rdmacm_mux.can_receive); } -static int check_mux_op_status(CharBackend *mad_chr_be) +static int rdmacm_mux_check_op_status(CharBackend *mad_chr_be) { RdmaCmMuxMsg msg = {}; int ret; - pr_dbg("Reading response\n"); ret = qemu_chr_fe_read_all(mad_chr_be, (uint8_t *)&msg, sizeof(msg)); if (ret != sizeof(msg)) { - pr_dbg("Invalid message size %d, expecting %ld\n", ret, sizeof(msg)); + rdma_error_report("Got invalid message from mux: size %d, expecting %d", + ret, (int)sizeof(msg)); return -EIO; } - pr_dbg("msg_type=%d\n", msg.hdr.msg_type); - pr_dbg("op_code=%d\n", msg.hdr.op_code); - pr_dbg("err_code=%d\n", msg.hdr.err_code); + trace_rdmacm_mux_check_op_status(msg.hdr.msg_type, msg.hdr.op_code, + msg.hdr.err_code); if (msg.hdr.msg_type != RDMACM_MUX_MSG_TYPE_RESP) { - pr_dbg("Invalid message type %d\n", msg.hdr.msg_type); + rdma_error_report("Got invalid message type %d", msg.hdr.msg_type); return -EIO; } if (msg.hdr.err_code != RDMACM_MUX_ERR_CODE_OK) { - pr_dbg("Operation failed in mux, error code %d\n", msg.hdr.err_code); + rdma_error_report("Operation failed in mux, error code %d", + msg.hdr.err_code); return -EIO; } return 0; } -static int exec_rdmacm_mux_req(RdmaBackendDev *backend_dev, RdmaCmMuxMsg *msg) +static int rdmacm_mux_send(RdmaBackendDev *backend_dev, RdmaCmMuxMsg *msg) { int rc = 0; - pr_dbg("Executing request %d\n", msg->hdr.op_code); - msg->hdr.msg_type = RDMACM_MUX_MSG_TYPE_REQ; + trace_rdmacm_mux("send", msg->hdr.msg_type, msg->hdr.op_code); disable_rdmacm_mux_async(backend_dev); rc = qemu_chr_fe_write(backend_dev->rdmacm_mux.chr_be, (const uint8_t *)msg, sizeof(*msg)); if (rc != sizeof(*msg)) { enable_rdmacm_mux_async(backend_dev); - pr_dbg("Fail to send request to rdmacm_mux (rc=%d)\n", rc); + rdma_error_report("Failed to send request to rdmacm_mux (rc=%d)", rc); return -EIO; } - rc = check_mux_op_status(backend_dev->rdmacm_mux.chr_be); + rc = rdmacm_mux_check_op_status(backend_dev->rdmacm_mux.chr_be); if (rc) { - pr_dbg("Fail to execute rdmacm_mux request %d (rc=%d)\n", - msg->hdr.op_code, rc); + rdma_error_report("Failed to execute rdmacm_mux request %d (rc=%d)", + msg->hdr.op_code, rc); } enable_rdmacm_mux_async(backend_dev); @@ -237,7 +225,6 @@ static void stop_backend_thread(RdmaBackendThread *thread) { thread->run = false; while (thread->is_running) { - pr_dbg("Waiting for thread to complete\n"); sleep(THR_POLL_TO / SCALE_US / 2); } } @@ -273,7 +260,7 @@ int rdma_backend_query_port(RdmaBackendDev *backend_dev, rc = ibv_query_port(backend_dev->context, backend_dev->port_num, port_attr); if (rc) { - pr_dbg("Error %d from ibv_query_port\n", rc); + rdma_error_report("ibv_query_port fail, rc=%d, errno=%d", rc, errno); return -EIO; } @@ -282,7 +269,7 @@ int rdma_backend_query_port(RdmaBackendDev *backend_dev, void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq) { - poll_cq(rdma_dev_res, cq->ibcq); + rdma_poll_cq(rdma_dev_res, cq->ibcq); } static GHashTable *ah_hash; @@ -294,8 +281,8 @@ static struct ibv_ah *create_ah(RdmaBackendDev *backend_dev, struct ibv_pd *pd, struct ibv_ah *ah = g_hash_table_lookup(ah_hash, ah_key); if (ah) { - trace_create_ah_cache_hit(be64_to_cpu(dgid->global.subnet_prefix), - be64_to_cpu(dgid->global.interface_id)); + trace_rdma_create_ah_cache_hit(be64_to_cpu(dgid->global.subnet_prefix), + be64_to_cpu(dgid->global.interface_id)); g_bytes_unref(ah_key); } else { struct ibv_ah_attr ah_attr = { @@ -312,13 +299,13 @@ static struct ibv_ah *create_ah(RdmaBackendDev *backend_dev, struct ibv_pd *pd, g_hash_table_insert(ah_hash, ah_key, ah); } else { g_bytes_unref(ah_key); - pr_dbg("Fail to create AH for gid <0x%" PRIx64 ", 0x%" PRIx64 ">\n", - be64_to_cpu(dgid->global.subnet_prefix), - be64_to_cpu(dgid->global.interface_id)); + rdma_error_report("Failed to create AH for gid <0x%" PRIx64", 0x%"PRIx64">", + be64_to_cpu(dgid->global.subnet_prefix), + be64_to_cpu(dgid->global.interface_id)); } - trace_create_ah_cache_miss(be64_to_cpu(dgid->global.subnet_prefix), - be64_to_cpu(dgid->global.interface_id)); + trace_rdma_create_ah_cache_miss(be64_to_cpu(dgid->global.subnet_prefix), + be64_to_cpu(dgid->global.interface_id)); } return ah; @@ -349,12 +336,10 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, RdmaRmMR *mr; int ssge_idx; - pr_dbg("num_sge=%d\n", num_sge); - for (ssge_idx = 0; ssge_idx < num_sge; ssge_idx++) { mr = rdma_rm_get_mr(rdma_dev_res, ssge[ssge_idx].lkey); if (unlikely(!mr)) { - pr_dbg("Invalid lkey 0x%x\n", ssge[ssge_idx].lkey); + rdma_error_report("Invalid lkey 0x%x", ssge[ssge_idx].lkey); return VENDOR_ERR_INVLKEY | ssge[ssge_idx].lkey; } @@ -362,17 +347,28 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, dsge->length = ssge[ssge_idx].length; dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr); - pr_dbg("ssge->addr=0x%" PRIx64 "\n", ssge[ssge_idx].addr); - pr_dbg("dsge->addr=0x%" PRIx64 "\n", dsge->addr); - pr_dbg("dsge->length=%d\n", dsge->length); - pr_dbg("dsge->lkey=0x%x\n", dsge->lkey); - dsge++; } return 0; } +static void trace_mad_message(const char *title, char *buf, int len) +{ + int i; + char *b = g_malloc0(len * 3 + 1); + char b1[4]; + + for (i = 0; i < len; i++) { + sprintf(b1, "%.2X ", buf[i] & 0x000000FF); + strcat(b, b1); + } + + trace_rdma_mad_message(title, len, b); + + g_free(b); +} + static int mad_send(RdmaBackendDev *backend_dev, uint8_t sgid_idx, union ibv_gid *sgid, struct ibv_sge *sge, uint32_t num_sge) { @@ -380,8 +376,6 @@ static int mad_send(RdmaBackendDev *backend_dev, uint8_t sgid_idx, char *hdr, *data; int ret; - pr_dbg("num_sge=%d\n", num_sge); - if (num_sge != 2) { return -EINVAL; } @@ -390,7 +384,6 @@ static int mad_send(RdmaBackendDev *backend_dev, uint8_t sgid_idx, memcpy(msg.hdr.sgid.raw, sgid->raw, sizeof(msg.hdr.sgid)); msg.umad_len = sge[0].length + sge[1].length; - pr_dbg("umad_len=%d\n", msg.umad_len); if (msg.umad_len > sizeof(msg.umad.mad)) { return -ENOMEM; @@ -398,36 +391,31 @@ static int mad_send(RdmaBackendDev *backend_dev, uint8_t sgid_idx, msg.umad.hdr.addr.qpn = htobe32(1); msg.umad.hdr.addr.grh_present = 1; - pr_dbg("sgid_idx=%d\n", sgid_idx); - pr_dbg("sgid=0x%llx\n", sgid->global.interface_id); msg.umad.hdr.addr.gid_index = sgid_idx; memcpy(msg.umad.hdr.addr.gid, sgid->raw, sizeof(msg.umad.hdr.addr.gid)); msg.umad.hdr.addr.hop_limit = 0xFF; hdr = rdma_pci_dma_map(backend_dev->dev, sge[0].addr, sge[0].length); if (!hdr) { - pr_dbg("Fail to map to sge[0]\n"); return -ENOMEM; } data = rdma_pci_dma_map(backend_dev->dev, sge[1].addr, sge[1].length); if (!data) { - pr_dbg("Fail to map to sge[1]\n"); rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length); return -ENOMEM; } - pr_dbg_buf("mad_hdr", hdr, sge[0].length); - pr_dbg_buf("mad_data", data, sge[1].length); - memcpy(&msg.umad.mad[0], hdr, sge[0].length); memcpy(&msg.umad.mad[sge[0].length], data, sge[1].length); rdma_pci_dma_unmap(backend_dev->dev, data, sge[1].length); rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length); - ret = exec_rdmacm_mux_req(backend_dev, &msg); + trace_mad_message("send", msg.umad.mad, msg.umad_len); + + ret = rdmacm_mux_send(backend_dev, &msg); if (ret) { - pr_dbg("Fail to send MAD to rdma_umadmux (%d)\n", ret); + rdma_error_report("Failed to send MAD to rdma_umadmux (%d)", ret); return -EIO; } @@ -447,12 +435,11 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, int rc; struct ibv_send_wr wr = {0}, *bad_wr; - if (!qp->ibqp) { /* This field does not get initialized for QP0 and QP1 */ + if (!qp->ibqp) { /* This field is not initialized for QP0 and QP1 */ if (qp_type == IBV_QPT_SMI) { - pr_dbg("QP0 unsupported\n"); + rdma_error_report("Got QP0 request"); complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx); } else if (qp_type == IBV_QPT_GSI) { - pr_dbg("QP1\n"); rc = mad_send(backend_dev, sgid_idx, sgid, sge, num_sge); if (rc) { complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx); @@ -463,22 +450,17 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, return; } - pr_dbg("num_sge=%d\n", num_sge); - bctx = g_malloc0(sizeof(*bctx)); bctx->up_ctx = ctx; - bctx->is_tx_req = 1; rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); if (unlikely(rc)) { - pr_dbg("Failed to allocate cqe_ctx\n"); complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); goto out_free_bctx; } rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge); if (rc) { - pr_dbg("Error: Failed to build host SGE array\n"); complete_work(IBV_WC_GENERAL_ERR, rc, ctx); goto out_dealloc_cqe_ctx; } @@ -500,10 +482,9 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, wr.wr_id = bctx_id; rc = ibv_post_send(qp->ibqp, &wr, &bad_wr); - pr_dbg("ibv_post_send=%d\n", rc); if (rc) { - pr_dbg("Fail (%d, %d) to post send WQE to qpn %d\n", rc, errno, - qp->ibqp->qp_num); + rdma_error_report("ibv_post_send fail, qpn=0x%x, rc=%d, errno=%d", + qp->ibqp->qp_num, rc, errno); complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); goto out_dealloc_cqe_ctx; } @@ -526,29 +507,23 @@ static unsigned int save_mad_recv_buffer(RdmaBackendDev *backend_dev, uint32_t bctx_id; if (num_sge != 1) { - pr_dbg("Invalid num_sge (%d), expecting 1\n", num_sge); + rdma_error_report("Invalid num_sge (%d), expecting 1", num_sge); return VENDOR_ERR_INV_NUM_SGE; } if (sge[0].length < RDMA_MAX_PRIVATE_DATA + sizeof(struct ibv_grh)) { - pr_dbg("Too small buffer for MAD\n"); + rdma_error_report("Too small buffer for MAD"); return VENDOR_ERR_INV_MAD_BUFF; } - pr_dbg("addr=0x%" PRIx64"\n", sge[0].addr); - pr_dbg("length=%d\n", sge[0].length); - pr_dbg("lkey=%d\n", sge[0].lkey); - bctx = g_malloc0(sizeof(*bctx)); rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); if (unlikely(rc)) { g_free(bctx); - pr_dbg("Fail to allocate cqe_ctx\n"); return VENDOR_ERR_NOMEM; } - pr_dbg("bctx_id %d, bctx %p, ctx %p\n", bctx_id, bctx, ctx); bctx->up_ctx = ctx; bctx->sge = *sge; @@ -572,11 +547,10 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, if (!qp->ibqp) { /* This field does not get initialized for QP0 and QP1 */ if (qp_type == IBV_QPT_SMI) { - pr_dbg("QP0 unsupported\n"); + rdma_error_report("Got QP0 request"); complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx); } if (qp_type == IBV_QPT_GSI) { - pr_dbg("QP1\n"); rc = save_mad_recv_buffer(backend_dev, sge, num_sge, ctx); if (rc) { complete_work(IBV_WC_GENERAL_ERR, rc, ctx); @@ -585,22 +559,17 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, return; } - pr_dbg("num_sge=%d\n", num_sge); - bctx = g_malloc0(sizeof(*bctx)); bctx->up_ctx = ctx; - bctx->is_tx_req = 0; rc = rdma_rm_alloc_cqe_ctx(rdma_dev_res, &bctx_id, bctx); if (unlikely(rc)) { - pr_dbg("Failed to allocate cqe_ctx\n"); complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); goto out_free_bctx; } rc = build_host_sge_array(rdma_dev_res, new_sge, sge, num_sge); if (rc) { - pr_dbg("Error: Failed to build host SGE array\n"); complete_work(IBV_WC_GENERAL_ERR, rc, ctx); goto out_dealloc_cqe_ctx; } @@ -609,10 +578,9 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, wr.sg_list = new_sge; wr.wr_id = bctx_id; rc = ibv_post_recv(qp->ibqp, &wr, &bad_wr); - pr_dbg("ibv_post_recv=%d\n", rc); if (rc) { - pr_dbg("Fail (%d, %d) to post recv WQE to qpn %d\n", rc, errno, - qp->ibqp->qp_num); + rdma_error_report("ibv_post_recv fail, qpn=0x%x, rc=%d, errno=%d", + qp->ibqp->qp_num, rc, errno); complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); goto out_dealloc_cqe_ctx; } @@ -630,7 +598,12 @@ int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd) { pd->ibpd = ibv_alloc_pd(backend_dev->context); - return pd->ibpd ? 0 : -EIO; + if (!pd->ibpd) { + rdma_error_report("ibv_alloc_pd fail, errno=%d", errno); + return -EIO; + } + + return 0; } void rdma_backend_destroy_pd(RdmaBackendPD *pd) @@ -643,16 +616,15 @@ void rdma_backend_destroy_pd(RdmaBackendPD *pd) int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, size_t length, int access) { - pr_dbg("addr=0x%p\n", addr); - pr_dbg("len=%zu\n", length); mr->ibmr = ibv_reg_mr(pd->ibpd, addr, length, access); - if (mr->ibmr) { - pr_dbg("lkey=0x%x\n", mr->ibmr->lkey); - pr_dbg("rkey=0x%x\n", mr->ibmr->rkey); - mr->ibpd = pd->ibpd; + if (!mr->ibmr) { + rdma_error_report("ibv_reg_mr fail, errno=%d", errno); + return -EIO; } - return mr->ibmr ? 0 : -EIO; + mr->ibpd = pd->ibpd; + + return 0; } void rdma_backend_destroy_mr(RdmaBackendMR *mr) @@ -667,21 +639,21 @@ int rdma_backend_create_cq(RdmaBackendDev *backend_dev, RdmaBackendCQ *cq, { int rc; - pr_dbg("cqe=%d\n", cqe); - - pr_dbg("dev->channel=%p\n", backend_dev->channel); cq->ibcq = ibv_create_cq(backend_dev->context, cqe + 1, NULL, backend_dev->channel, 0); - - if (cq->ibcq) { - rc = ibv_req_notify_cq(cq->ibcq, 0); - if (rc) { - pr_dbg("Error %d from ibv_req_notify_cq\n", rc); - } - cq->backend_dev = backend_dev; + if (!cq->ibcq) { + rdma_error_report("ibv_create_cq fail, errno=%d", errno); + return -EIO; } - return cq->ibcq ? 0 : -EIO; + rc = ibv_req_notify_cq(cq->ibcq, 0); + if (rc) { + rdma_warn_report("ibv_req_notify_cq fail, rc=%d, errno=%d", rc, errno); + } + + cq->backend_dev = backend_dev; + + return 0; } void rdma_backend_destroy_cq(RdmaBackendCQ *cq) @@ -700,7 +672,6 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, struct ibv_qp_init_attr attr = {0}; qp->ibqp = 0; - pr_dbg("qp_type=%d\n", qp_type); switch (qp_type) { case IBV_QPT_GSI: @@ -713,7 +684,7 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, break; default: - pr_dbg("Unsupported QP type %d\n", qp_type); + rdma_error_report("Unsupported QP type %d", qp_type); return -EIO; } @@ -725,14 +696,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, attr.cap.max_send_sge = max_send_sge; attr.cap.max_recv_sge = max_recv_sge; - pr_dbg("max_send_wr=%d\n", max_send_wr); - pr_dbg("max_recv_wr=%d\n", max_recv_wr); - pr_dbg("max_send_sge=%d\n", max_send_sge); - pr_dbg("max_recv_sge=%d\n", max_recv_sge); - qp->ibqp = ibv_create_qp(pd->ibpd, &attr); - if (likely(!qp->ibqp)) { - pr_dbg("Error from ibv_create_qp\n"); + if (!qp->ibqp) { + rdma_error_report("ibv_create_qp fail, errno=%d", errno); return -EIO; } @@ -740,8 +706,6 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, /* TODO: Query QP to get max_inline_data and save it to be used in send */ - pr_dbg("qpn=0x%x\n", qp->ibqp->qp_num); - return 0; } @@ -751,9 +715,6 @@ int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, struct ibv_qp_attr attr = {0}; int rc, attr_mask; - pr_dbg("qpn=0x%x\n", qp->ibqp->qp_num); - pr_dbg("sport_num=%d\n", backend_dev->port_num); - attr_mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT; attr.qp_state = IBV_QPS_INIT; attr.pkey_index = 0; @@ -762,21 +723,23 @@ int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, switch (qp_type) { case IBV_QPT_RC: attr_mask |= IBV_QP_ACCESS_FLAGS; + trace_rdma_backend_rc_qp_state_init(qp->ibqp->qp_num); break; case IBV_QPT_UD: attr.qkey = qkey; attr_mask |= IBV_QP_QKEY; + trace_rdma_backend_ud_qp_state_init(qp->ibqp->qp_num, qkey); break; default: - pr_dbg("Unsupported QP type %d\n", qp_type); + rdma_error_report("Unsupported QP type %d", qp_type); return -EIO; } rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask); if (rc) { - pr_dbg("Error %d from ibv_modify_qp\n", rc); + rdma_error_report("ibv_modify_qp fail, rc=%d, errno=%d", rc, errno); return -EIO; } @@ -802,14 +765,6 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, switch (qp_type) { case IBV_QPT_RC: - pr_dbg("dgid=0x%" PRIx64 ",%" PRIx64 "\n", - be64_to_cpu(ibv_gid.global.subnet_prefix), - be64_to_cpu(ibv_gid.global.interface_id)); - pr_dbg("dqpn=0x%x\n", dqpn); - pr_dbg("sgid_idx=%d\n", qp->sgid_idx); - pr_dbg("sport_num=%d\n", backend_dev->port_num); - pr_dbg("rq_psn=0x%x\n", rq_psn); - attr.path_mtu = IBV_MTU_1024; attr.dest_qp_num = dqpn; attr.max_dest_rd_atomic = 1; @@ -824,20 +779,28 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, attr_mask |= IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; + + trace_rdma_backend_rc_qp_state_rtr(qp->ibqp->qp_num, + be64_to_cpu(ibv_gid.global. + subnet_prefix), + be64_to_cpu(ibv_gid.global. + interface_id), + qp->sgid_idx, dqpn, rq_psn); break; case IBV_QPT_UD: - pr_dbg("qkey=0x%x\n", qkey); if (use_qkey) { attr.qkey = qkey; attr_mask |= IBV_QP_QKEY; } + trace_rdma_backend_ud_qp_state_rtr(qp->ibqp->qp_num, use_qkey ? qkey : + 0); break; } rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask); if (rc) { - pr_dbg("Error %d from ibv_modify_qp\n", rc); + rdma_error_report("ibv_modify_qp fail, rc=%d, errno=%d", rc, errno); return -EIO; } @@ -850,9 +813,6 @@ int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type, struct ibv_qp_attr attr = {0}; int rc, attr_mask; - pr_dbg("qpn=0x%x\n", qp->ibqp->qp_num); - pr_dbg("sq_psn=0x%x\n", sq_psn); - attr.qp_state = IBV_QPS_RTS; attr.sq_psn = sq_psn; attr_mask = IBV_QP_STATE | IBV_QP_SQ_PSN; @@ -866,20 +826,22 @@ int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type, attr_mask |= IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC; + trace_rdma_backend_rc_qp_state_rts(qp->ibqp->qp_num, sq_psn); break; case IBV_QPT_UD: if (use_qkey) { - pr_dbg("qkey=0x%x\n", qkey); attr.qkey = qkey; attr_mask |= IBV_QP_QKEY; } + trace_rdma_backend_ud_qp_state_rts(qp->ibqp->qp_num, sq_psn, + use_qkey ? qkey : 0); break; } rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask); if (rc) { - pr_dbg("Error %d from ibv_modify_qp\n", rc); + rdma_error_report("ibv_modify_qp fail, rc=%d, errno=%d", rc, errno); return -EIO; } @@ -890,7 +852,6 @@ int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr) { if (!qp->ibqp) { - pr_dbg("QP1\n"); attr->qp_state = IBV_QPS_RTS; return 0; } @@ -906,20 +867,23 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp) } #define CHK_ATTR(req, dev, member, fmt) ({ \ - pr_dbg("%s="fmt","fmt"\n", #member, dev.member, req->member); \ + trace_rdma_check_dev_attr(#member, dev.member, req->member); \ if (req->member > dev.member) { \ - warn_report("%s = "fmt" is higher than host device capability "fmt, \ - #member, req->member, dev.member); \ + rdma_warn_report("%s = "fmt" is higher than host device capability "fmt, \ + #member, req->member, dev.member); \ req->member = dev.member; \ } \ - pr_dbg("%s="fmt"\n", #member, req->member); }) +}) static int init_device_caps(RdmaBackendDev *backend_dev, struct ibv_device_attr *dev_attr) { struct ibv_device_attr bk_dev_attr; + int rc; - if (ibv_query_device(backend_dev->context, &bk_dev_attr)) { + rc = ibv_query_device(backend_dev->context, &bk_dev_attr); + if (rc) { + rdma_error_report("ibv_query_device fail, rc=%d, errno=%d", rc, errno); return -EIO; } @@ -928,9 +892,7 @@ static int init_device_caps(RdmaBackendDev *backend_dev, CHK_ATTR(dev_attr, bk_dev_attr, max_mr_size, "%" PRId64); CHK_ATTR(dev_attr, bk_dev_attr, max_qp, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_sge, "%d"); - CHK_ATTR(dev_attr, bk_dev_attr, max_qp_wr, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_cq, "%d"); - CHK_ATTR(dev_attr, bk_dev_attr, max_cqe, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_mr, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_pd, "%d"); CHK_ATTR(dev_attr, bk_dev_attr, max_qp_rd_atom, "%d"); @@ -946,10 +908,6 @@ static inline void build_mad_hdr(struct ibv_grh *grh, union ibv_gid *sgid, grh->paylen = htons(paylen); grh->sgid = *sgid; grh->dgid = *my_gid; - - pr_dbg("paylen=%d (net=0x%x)\n", paylen, grh->paylen); - pr_dbg("dgid=0x%llx\n", my_gid->global.interface_id); - pr_dbg("sgid=0x%llx\n", sgid->global.interface_id); } static void process_incoming_mad_req(RdmaBackendDev *backend_dev, @@ -960,21 +918,13 @@ static void process_incoming_mad_req(RdmaBackendDev *backend_dev, BackendCtx *bctx; char *mad; - pr_dbg("umad_len=%d\n", msg->umad_len); - -#ifdef PVRDMA_DEBUG - struct umad_hdr *hdr = (struct umad_hdr *)&msg->umad.mad; - pr_dbg("bv %x cls %x cv %x mtd %x st %d tid %" PRIx64 " at %x atm %x\n", - hdr->base_version, hdr->mgmt_class, hdr->class_version, - hdr->method, hdr->status, be64toh(hdr->tid), - hdr->attr_id, hdr->attr_mod); -#endif + trace_mad_message("recv", msg->umad.mad, msg->umad_len); qemu_mutex_lock(&backend_dev->recv_mads_list.lock); o_ctx_id = qlist_pop(backend_dev->recv_mads_list.list); qemu_mutex_unlock(&backend_dev->recv_mads_list.lock); if (!o_ctx_id) { - pr_dbg("No more free MADs buffers, waiting for a while\n"); + rdma_warn_report("No more free MADs buffers, waiting for a while"); sleep(THR_POLL_TO); return; } @@ -982,12 +932,10 @@ static void process_incoming_mad_req(RdmaBackendDev *backend_dev, cqe_ctx_id = qnum_get_uint(qobject_to(QNum, o_ctx_id)); bctx = rdma_rm_get_cqe_ctx(backend_dev->rdma_dev_res, cqe_ctx_id); if (unlikely(!bctx)) { - pr_dbg("Error: Fail to find ctx for %ld\n", cqe_ctx_id); + rdma_error_report("No matching ctx for req %ld", cqe_ctx_id); return; } - pr_dbg("id %ld, bctx %p, ctx %p\n", cqe_ctx_id, bctx, bctx->up_ctx); - mad = rdma_pci_dma_map(backend_dev->dev, bctx->sge.addr, bctx->sge.length); if (!mad || bctx->sge.length < msg->umad_len + MAD_HDR_SIZE) { @@ -995,7 +943,6 @@ static void process_incoming_mad_req(RdmaBackendDev *backend_dev, bctx->up_ctx); } else { struct ibv_wc wc = {0}; - pr_dbg_buf("mad", msg->umad.mad, msg->umad_len); memset(mad, 0, bctx->sge.length); build_mad_hdr((struct ibv_grh *)mad, (union ibv_gid *)&msg->umad.hdr.addr.gid, &msg->hdr.sgid, @@ -1025,13 +972,11 @@ static void rdmacm_mux_read(void *opaque, const uint8_t *buf, int size) RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque; RdmaCmMuxMsg *msg = (RdmaCmMuxMsg *)buf; - pr_dbg("Got %d bytes\n", size); - pr_dbg("msg_type=%d\n", msg->hdr.msg_type); - pr_dbg("op_code=%d\n", msg->hdr.op_code); + trace_rdmacm_mux("read", msg->hdr.msg_type, msg->hdr.op_code); if (msg->hdr.msg_type != RDMACM_MUX_MSG_TYPE_REQ && msg->hdr.op_code != RDMACM_MUX_OP_CODE_MAD) { - pr_dbg("Error: Not a MAD request, skipping\n"); + rdma_error_report("Error: Not a MAD request, skipping"); return; } process_incoming_mad_req(backend_dev, msg); @@ -1045,7 +990,7 @@ static int mad_init(RdmaBackendDev *backend_dev, CharBackend *mad_chr_be) ret = qemu_chr_fe_backend_connected(backend_dev->rdmacm_mux.chr_be); if (!ret) { - pr_dbg("Missing chardev for MAD multiplexer\n"); + rdma_error_report("Missing chardev for MAD multiplexer"); return -EIO; } @@ -1063,7 +1008,6 @@ static int mad_init(RdmaBackendDev *backend_dev, CharBackend *mad_chr_be) static void mad_fini(RdmaBackendDev *backend_dev) { - pr_dbg("Stopping MAD\n"); disable_rdmacm_mux_async(backend_dev); qemu_chr_fe_disconnect(backend_dev->rdmacm_mux.chr_be); if (backend_dev->recv_mads_list.list) { @@ -1079,17 +1023,15 @@ int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev, int ret; int i = 0; - pr_dbg("0x%llx, 0x%llx\n", - (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), - (long long unsigned int)be64_to_cpu(gid->global.interface_id)); - do { ret = ibv_query_gid(backend_dev->context, backend_dev->port_num, i, &sgid); i++; } while (!ret && (memcmp(&sgid, gid, sizeof(*gid)))); - pr_dbg("gid_index=%d\n", i - 1); + trace_rdma_backend_get_gid_index(be64_to_cpu(gid->global.subnet_prefix), + be64_to_cpu(gid->global.interface_id), + i - 1); return ret ? ret : i - 1; } @@ -1100,16 +1042,15 @@ int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname, RdmaCmMuxMsg msg = {}; int ret; - pr_dbg("0x%llx, 0x%llx\n", - (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), - (long long unsigned int)be64_to_cpu(gid->global.interface_id)); + trace_rdma_backend_gid_change("add", be64_to_cpu(gid->global.subnet_prefix), + be64_to_cpu(gid->global.interface_id)); msg.hdr.op_code = RDMACM_MUX_OP_CODE_REG; memcpy(msg.hdr.sgid.raw, gid->raw, sizeof(msg.hdr.sgid)); - ret = exec_rdmacm_mux_req(backend_dev, &msg); + ret = rdmacm_mux_send(backend_dev, &msg); if (ret) { - pr_dbg("Fail to register GID to rdma_umadmux (%d)\n", ret); + rdma_error_report("Failed to register GID to rdma_umadmux (%d)", ret); return -EIO; } @@ -1126,16 +1067,16 @@ int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname, RdmaCmMuxMsg msg = {}; int ret; - pr_dbg("0x%llx, 0x%llx\n", - (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), - (long long unsigned int)be64_to_cpu(gid->global.interface_id)); + trace_rdma_backend_gid_change("del", be64_to_cpu(gid->global.subnet_prefix), + be64_to_cpu(gid->global.interface_id)); msg.hdr.op_code = RDMACM_MUX_OP_CODE_UNREG; memcpy(msg.hdr.sgid.raw, gid->raw, sizeof(msg.hdr.sgid)); - ret = exec_rdmacm_mux_req(backend_dev, &msg); + ret = rdmacm_mux_send(backend_dev, &msg); if (ret) { - pr_dbg("Fail to unregister GID from rdma_umadmux (%d)\n", ret); + rdma_error_report("Failed to unregister GID from rdma_umadmux (%d)", + ret); return -EIO; } @@ -1149,8 +1090,7 @@ int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname, int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, RdmaDeviceResources *rdma_dev_res, const char *backend_device_name, uint8_t port_num, - struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be, - Error **errp) + struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be) { int i; int ret = 0; @@ -1167,12 +1107,12 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, dev_list = ibv_get_device_list(&num_ibv_devices); if (!dev_list) { - error_setg(errp, "Failed to get IB devices list"); + rdma_error_report("Failed to get IB devices list"); return -EIO; } if (num_ibv_devices == 0) { - error_setg(errp, "No IB devices were found"); + rdma_error_report("No IB devices were found"); ret = -ENXIO; goto out_free_dev_list; } @@ -1187,8 +1127,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, backend_dev->ib_dev = dev_list[i]; if (!backend_dev->ib_dev) { - error_setg(errp, "Failed to find IB device %s", - backend_device_name); + rdma_error_report("Failed to find IB device %s", + backend_device_name); ret = -EIO; goto out_free_dev_list; } @@ -1196,28 +1136,26 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, backend_dev->ib_dev = *dev_list; } - pr_dbg("Using backend device %s, port %d\n", - ibv_get_device_name(backend_dev->ib_dev), backend_dev->port_num); - pr_dbg("uverb device %s\n", backend_dev->ib_dev->dev_name); + rdma_info_report("uverb device %s", backend_dev->ib_dev->dev_name); backend_dev->context = ibv_open_device(backend_dev->ib_dev); if (!backend_dev->context) { - error_setg(errp, "Failed to open IB device"); + rdma_error_report("Failed to open IB device %s", + ibv_get_device_name(backend_dev->ib_dev)); ret = -EIO; goto out; } backend_dev->channel = ibv_create_comp_channel(backend_dev->context); if (!backend_dev->channel) { - error_setg(errp, "Failed to create IB communication channel"); + rdma_error_report("Failed to create IB communication channel"); ret = -EIO; goto out_close_device; } - pr_dbg("dev->backend_dev.channel=%p\n", backend_dev->channel); ret = init_device_caps(backend_dev, dev_attr); if (ret) { - error_setg(errp, "Failed to initialize device capabilities"); + rdma_error_report("Failed to initialize device capabilities"); ret = -EIO; goto out_destroy_comm_channel; } @@ -1225,7 +1163,7 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, ret = mad_init(backend_dev, mad_chr_be); if (ret) { - error_setg(errp, "Fail to initialize mad"); + rdma_error_report("Failed to initialize mad"); ret = -EIO; goto out_destroy_comm_channel; } @@ -1253,13 +1191,11 @@ out: void rdma_backend_start(RdmaBackendDev *backend_dev) { - pr_dbg("Starting rdma_backend\n"); start_comp_thread(backend_dev); } void rdma_backend_stop(RdmaBackendDev *backend_dev) { - pr_dbg("Stopping rdma_backend\n"); stop_backend_thread(&backend_dev->comp_thread); } diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index 5114c90e67..8e53a72bf2 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -58,8 +58,8 @@ static inline uint32_t rdma_backend_mr_rkey(const RdmaBackendMR *mr) int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev, RdmaDeviceResources *rdma_dev_res, const char *backend_device_name, uint8_t port_num, - struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be, - Error **errp); + struct ibv_device_attr *dev_attr, + CharBackend *mad_chr_be); void rdma_backend_fini(RdmaBackendDev *backend_dev); int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname, union ibv_gid *gid); diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index 268ff633a4..66177b42f5 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -17,6 +17,7 @@ #include "qapi/error.h" #include "cpu.h" +#include "trace.h" #include "rdma_utils.h" #include "rdma_backend.h" #include "rdma_rm.h" @@ -49,25 +50,26 @@ static inline void res_tbl_free(RdmaRmResTbl *tbl) g_free(tbl->bitmap); } -static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle) +static inline void *rdma_res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle) { - pr_dbg("%s, handle=%d\n", tbl->name, handle); + trace_rdma_res_tbl_get(tbl->name, handle); if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) { return tbl->tbl + handle * tbl->res_sz; } else { - pr_dbg("Invalid handle %d\n", handle); + rdma_error_report("Table %s, invalid handle %d", tbl->name, handle); return NULL; } } -static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle) +static inline void *rdma_res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle) { qemu_mutex_lock(&tbl->lock); *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz); if (*handle > tbl->tbl_sz) { - pr_dbg("Failed to alloc, bitmap is full\n"); + rdma_error_report("Table %s, failed to allocate, bitmap is full", + tbl->name); qemu_mutex_unlock(&tbl->lock); return NULL; } @@ -78,14 +80,14 @@ static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle) memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz); - pr_dbg("%s, handle=%d\n", tbl->name, *handle); + trace_rdma_res_tbl_alloc(tbl->name, *handle); return tbl->tbl + *handle * tbl->res_sz; } -static inline void res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle) +static inline void rdma_res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle) { - pr_dbg("%s, handle=%d\n", tbl->name, handle); + trace_rdma_res_tbl_dealloc(tbl->name, handle); qemu_mutex_lock(&tbl->lock); @@ -102,7 +104,7 @@ int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, RdmaRmPD *pd; int ret = -ENOMEM; - pd = res_tbl_alloc(&dev_res->pd_tbl, pd_handle); + pd = rdma_res_tbl_alloc(&dev_res->pd_tbl, pd_handle); if (!pd) { goto out; } @@ -118,7 +120,7 @@ int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, return 0; out_tbl_dealloc: - res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle); + rdma_res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle); out: return ret; @@ -126,7 +128,7 @@ out: RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle) { - return res_tbl_get(&dev_res->pd_tbl, pd_handle); + return rdma_res_tbl_get(&dev_res->pd_tbl, pd_handle); } void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle) @@ -135,14 +137,14 @@ void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle) if (pd) { rdma_backend_destroy_pd(&pd->backend_pd); - res_tbl_dealloc(&dev_res->pd_tbl, pd_handle); + rdma_res_tbl_dealloc(&dev_res->pd_tbl, pd_handle); } } int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, - uint64_t guest_start, size_t guest_length, void *host_virt, - int access_flags, uint32_t *mr_handle, uint32_t *lkey, - uint32_t *rkey) + uint64_t guest_start, uint64_t guest_length, + void *host_virt, int access_flags, uint32_t *mr_handle, + uint32_t *lkey, uint32_t *rkey) { RdmaRmMR *mr; int ret = 0; @@ -150,20 +152,15 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, pd = rdma_rm_get_pd(dev_res, pd_handle); if (!pd) { - pr_dbg("Invalid PD\n"); return -EINVAL; } - mr = res_tbl_alloc(&dev_res->mr_tbl, mr_handle); + mr = rdma_res_tbl_alloc(&dev_res->mr_tbl, mr_handle); if (!mr) { - pr_dbg("Failed to allocate obj in table\n"); return -ENOMEM; } - pr_dbg("mr_handle=%d\n", *mr_handle); - - pr_dbg("host_virt=0x%p\n", host_virt); - pr_dbg("guest_start=0x%" PRIx64 "\n", guest_start); - pr_dbg("length=%zu\n", guest_length); + trace_rdma_rm_alloc_mr(*mr_handle, host_virt, guest_start, guest_length, + access_flags); if (host_virt) { mr->virt = host_virt; @@ -174,7 +171,6 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt, mr->length, access_flags); if (ret) { - pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret); ret = -EIO; goto out_dealloc_mr; } @@ -189,14 +185,14 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, return 0; out_dealloc_mr: - res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle); + rdma_res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle); return ret; } RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle) { - return res_tbl_get(&dev_res->mr_tbl, mr_handle); + return rdma_res_tbl_get(&dev_res->mr_tbl, mr_handle); } void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle) @@ -205,12 +201,12 @@ void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle) if (mr) { rdma_backend_destroy_mr(&mr->backend_mr); - pr_dbg("start=0x%" PRIx64 "\n", mr->start); + trace_rdma_rm_dealloc_mr(mr_handle, mr->start); if (mr->start) { mr->virt -= (mr->start & (TARGET_PAGE_SIZE - 1)); munmap(mr->virt, mr->length); } - res_tbl_dealloc(&dev_res->mr_tbl, mr_handle); + rdma_res_tbl_dealloc(&dev_res->mr_tbl, mr_handle); } } @@ -222,12 +218,13 @@ int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn, /* TODO: Need to make sure pfn is between bar start address and * bsd+RDMA_BAR2_UAR_SIZE if (pfn > RDMA_BAR2_UAR_SIZE) { - pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE); + rdma_error_report("pfn out of range (%d > %d)", pfn, + RDMA_BAR2_UAR_SIZE); return -ENOMEM; } */ - uc = res_tbl_alloc(&dev_res->uc_tbl, uc_handle); + uc = rdma_res_tbl_alloc(&dev_res->uc_tbl, uc_handle); if (!uc) { return -ENOMEM; } @@ -237,7 +234,7 @@ int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn, RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle) { - return res_tbl_get(&dev_res->uc_tbl, uc_handle); + return rdma_res_tbl_get(&dev_res->uc_tbl, uc_handle); } void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle) @@ -245,13 +242,13 @@ void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle) RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle); if (uc) { - res_tbl_dealloc(&dev_res->uc_tbl, uc_handle); + rdma_res_tbl_dealloc(&dev_res->uc_tbl, uc_handle); } } RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle) { - return res_tbl_get(&dev_res->cq_tbl, cq_handle); + return rdma_res_tbl_get(&dev_res->cq_tbl, cq_handle); } int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, @@ -260,7 +257,7 @@ int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, int rc; RdmaRmCQ *cq; - cq = res_tbl_alloc(&dev_res->cq_tbl, cq_handle); + cq = rdma_res_tbl_alloc(&dev_res->cq_tbl, cq_handle); if (!cq) { return -ENOMEM; } @@ -287,8 +284,6 @@ void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle, { RdmaRmCQ *cq; - pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle, notify); - cq = rdma_rm_get_cq(dev_res, cq_handle); if (!cq) { return; @@ -297,8 +292,6 @@ void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle, if (cq->notify != CNT_SET) { cq->notify = notify ? CNT_ARM : CNT_CLEAR; } - - pr_dbg("notify=%d\n", cq->notify); } void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle) @@ -312,7 +305,7 @@ void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle) rdma_backend_destroy_cq(&cq->backend_cq); - res_tbl_dealloc(&dev_res->cq_tbl, cq_handle); + rdma_res_tbl_dealloc(&dev_res->cq_tbl, cq_handle); } RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn) @@ -323,6 +316,10 @@ RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn) g_bytes_unref(key); + if (!qp) { + rdma_error_report("Invalid QP handle %d", qpn); + } + return qp; } @@ -338,11 +335,8 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, RdmaRmPD *pd; uint32_t rm_qpn; - pr_dbg("qp_type=%d\n", qp_type); - pd = rdma_rm_get_pd(dev_res, pd_handle); if (!pd) { - pr_err("Invalid pd handle (%d)\n", pd_handle); return -EINVAL; } @@ -350,8 +344,8 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, rcq = rdma_rm_get_cq(dev_res, recv_cq_handle); if (!scq || !rcq) { - pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n", - send_cq_handle, recv_cq_handle); + rdma_error_report("Invalid send_cqn or recv_cqn (%d, %d)", + send_cq_handle, recv_cq_handle); return -EINVAL; } @@ -360,11 +354,10 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, rcq->notify = CNT_SET; } - qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn); + qp = rdma_res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn); if (!qp) { return -ENOMEM; } - pr_dbg("rm_qpn=%d\n", rm_qpn); qp->qpn = rm_qpn; qp->qp_state = IBV_QPS_RESET; @@ -382,13 +375,13 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, } *qpn = rdma_backend_qpn(&qp->backend_qp); - pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn, *qpn); + trace_rdma_rm_alloc_qp(rm_qpn, *qpn, qp_type); g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp); return 0; out_dealloc_qp: - res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn); + rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn); return rc; } @@ -402,28 +395,22 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, RdmaRmQP *qp; int ret; - pr_dbg("qpn=0x%x\n", qp_handle); - pr_dbg("qkey=0x%x\n", qkey); - qp = rdma_rm_get_qp(dev_res, qp_handle); if (!qp) { return -EINVAL; } - pr_dbg("qp_type=%d\n", qp->qp_type); - pr_dbg("attr_mask=0x%x\n", attr_mask); - if (qp->qp_type == IBV_QPT_SMI) { - pr_dbg("QP0 unsupported\n"); + rdma_error_report("Got QP0 request"); return -EPERM; } else if (qp->qp_type == IBV_QPT_GSI) { - pr_dbg("QP1\n"); return 0; } + trace_rdma_rm_modify_qp(qp_handle, attr_mask, qp_state, sgid_idx); + if (attr_mask & IBV_QP_STATE) { qp->qp_state = qp_state; - pr_dbg("qp_state=%d\n", qp->qp_state); if (qp->qp_state == IBV_QPS_INIT) { ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp, @@ -435,11 +422,11 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, if (qp->qp_state == IBV_QPS_RTR) { /* Get backend gid index */ - pr_dbg("Guest sgid_idx=%d\n", sgid_idx); sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev, sgid_idx); if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */ - pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", sgid_idx); + rdma_error_report("Failed to get bk sgid_idx for sgid_idx %d", + sgid_idx); return -EIO; } @@ -471,15 +458,11 @@ int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, { RdmaRmQP *qp; - pr_dbg("qpn=0x%x\n", qp_handle); - qp = rdma_rm_get_qp(dev_res, qp_handle); if (!qp) { return -EINVAL; } - pr_dbg("qp_type=%d\n", qp->qp_type); - return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr); } @@ -499,20 +482,18 @@ void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle) rdma_backend_destroy_qp(&qp->backend_qp); - res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn); + rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn); } void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id) { void **cqe_ctx; - cqe_ctx = res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id); + cqe_ctx = rdma_res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id); if (!cqe_ctx) { return NULL; } - pr_dbg("ctx=%p\n", *cqe_ctx); - return *cqe_ctx; } @@ -521,12 +502,11 @@ int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id, { void **cqe_ctx; - cqe_ctx = res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id); + cqe_ctx = rdma_res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id); if (!cqe_ctx) { return -ENOMEM; } - pr_dbg("ctx=%p\n", ctx); *cqe_ctx = ctx; return 0; @@ -534,7 +514,7 @@ int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id, void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id) { - res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id); + rdma_res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id); } int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, @@ -544,7 +524,6 @@ int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, rc = rdma_backend_add_gid(backend_dev, ifname, gid); if (rc) { - pr_dbg("Fail to add gid\n"); return -EINVAL; } @@ -565,7 +544,6 @@ int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, rc = rdma_backend_del_gid(backend_dev, ifname, &dev_res->port.gid_tbl[gid_idx].gid); if (rc) { - pr_dbg("Fail to delete gid\n"); return -EINVAL; } @@ -580,7 +558,7 @@ int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, int sgid_idx) { if (unlikely(sgid_idx < 0 || sgid_idx >= MAX_PORT_GIDS)) { - pr_dbg("Got invalid sgid_idx %d\n", sgid_idx); + rdma_error_report("Got invalid sgid_idx %d", sgid_idx); return -EINVAL; } @@ -590,9 +568,6 @@ int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res, &dev_res->port.gid_tbl[sgid_idx].gid); } - pr_dbg("backend_gid_index=%d\n", - dev_res->port.gid_tbl[sgid_idx].backend_gid_index); - return dev_res->port.gid_tbl[sgid_idx].backend_gid_index; } @@ -624,8 +599,7 @@ static void fini_ports(RdmaDeviceResources *dev_res, } } -int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr, - Error **errp) +int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr) { dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal, destroy_qp_hash_key, NULL); diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h index 3c602c04c0..f9b2ec5076 100644 --- a/hw/rdma/rdma_rm.h +++ b/hw/rdma/rdma_rm.h @@ -20,8 +20,8 @@ #include "rdma_backend_defs.h" #include "rdma_rm_defs.h" -int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr, - Error **errp); +int rdma_rm_init(RdmaDeviceResources *dev_res, + struct ibv_device_attr *dev_attr); void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, const char *ifname); @@ -31,9 +31,9 @@ RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle); void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle); int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, - uint64_t guest_start, size_t guest_length, void *host_virt, - int access_flags, uint32_t *mr_handle, uint32_t *lkey, - uint32_t *rkey); + uint64_t guest_start, uint64_t guest_length, + void *host_virt, int access_flags, uint32_t *mr_handle, + uint32_t *lkey, uint32_t *rkey); RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle); void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle); diff --git a/hw/rdma/rdma_utils.c b/hw/rdma/rdma_utils.c index 4fbea8cde2..b9f07fcda7 100644 --- a/hw/rdma/rdma_utils.c +++ b/hw/rdma/rdma_utils.c @@ -14,26 +14,23 @@ */ #include "qemu/osdep.h" +#include "trace.h" #include "rdma_utils.h" -#ifdef PVRDMA_DEBUG -unsigned long pr_dbg_cnt; -#endif - void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen) { void *p; hwaddr len = plen; if (!addr) { - pr_dbg("addr is NULL\n"); + rdma_error_report("addr is NULL"); return NULL; } p = pci_dma_map(dev, addr, &len, DMA_DIRECTION_TO_DEVICE); if (!p) { - pr_dbg("Fail in pci_dma_map, addr=0x%" PRIx64 ", len=%" PRId64 "\n", - addr, len); + rdma_error_report("pci_dma_map fail, addr=0x%"PRIx64", len=%"PRId64, + addr, len); return NULL; } @@ -42,14 +39,14 @@ void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen) return NULL; } - pr_dbg("0x%" PRIx64 " -> %p (len=% " PRId64 ")\n", addr, p, len); + trace_rdma_pci_dma_map(addr, p, len); return p; } void rdma_pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len) { - pr_dbg("%p\n", buffer); + trace_rdma_pci_dma_unmap(buffer); if (buffer) { pci_dma_unmap(dev, buffer, len, DMA_DIRECTION_TO_DEVICE, 0); } diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h index 4490ea0b94..acd148837f 100644 --- a/hw/rdma/rdma_utils.h +++ b/hw/rdma/rdma_utils.h @@ -17,48 +17,17 @@ #ifndef RDMA_UTILS_H #define RDMA_UTILS_H +#include "qemu/error-report.h" #include "hw/pci/pci.h" #include "sysemu/dma.h" #include "stdio.h" -#define pr_info(fmt, ...) \ - fprintf(stdout, "%s: %-20s (%3d): " fmt, "rdma", __func__, __LINE__,\ - ## __VA_ARGS__) - -#define pr_err(fmt, ...) \ - fprintf(stderr, "%s: Error at %-20s (%3d): " fmt, "rdma", __func__, \ - __LINE__, ## __VA_ARGS__) - -#ifdef PVRDMA_DEBUG -extern unsigned long pr_dbg_cnt; - -#define init_pr_dbg(void) \ -{ \ - pr_dbg_cnt = 0; \ -} - -#define pr_dbg(fmt, ...) \ - fprintf(stdout, "%lx %ld: %-20s (%3d): " fmt, pthread_self(), pr_dbg_cnt++, \ - __func__, __LINE__, ## __VA_ARGS__) - -#define pr_dbg_buf(title, buf, len) \ -{ \ - int i; \ - char *b = g_malloc0(len * 3 + 1); \ - char b1[4]; \ - for (i = 0; i < len; i++) { \ - sprintf(b1, "%.2X ", buf[i] & 0x000000FF); \ - strcat(b, b1); \ - } \ - pr_dbg("%s (%d): %s\n", title, len, b); \ - g_free(b); \ -} - -#else -#define init_pr_dbg(void) -#define pr_dbg(fmt, ...) -#define pr_dbg_buf(title, buf, len) -#endif +#define rdma_error_report(fmt, ...) \ + error_report("%s: " fmt, "rdma", ## __VA_ARGS__) +#define rdma_warn_report(fmt, ...) \ + warn_report("%s: " fmt, "rdma", ## __VA_ARGS__) +#define rdma_info_report(fmt, ...) \ + info_report("%s: " fmt, "rdma", ## __VA_ARGS__) void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen); void rdma_pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len); diff --git a/hw/rdma/trace-events b/hw/rdma/trace-events index c4c202e647..0fad56c882 100644 --- a/hw/rdma/trace-events +++ b/hw/rdma/trace-events @@ -1,5 +1,31 @@ # See docs/tracing.txt for syntax documentation. -#hw/rdma/rdma_backend.c -create_ah_cache_hit(uint64_t subnet, uint64_t net_id) "subnet = 0x%"PRIx64" net_id = 0x%"PRIx64 -create_ah_cache_miss(uint64_t subnet, uint64_t net_id) "subnet = 0x%"PRIx64" net_id = 0x%"PRIx64 +# hw/rdma/rdma_backend.c +rdma_check_dev_attr(const char *name, int max_bk, int max_fe) "%s: be=%d, fe=%d" +rdma_create_ah_cache_hit(uint64_t subnet, uint64_t if_id) "subnet=0x%"PRIx64",if_id=0x%"PRIx64 +rdma_create_ah_cache_miss(uint64_t subnet, uint64_t if_id) "subnet=0x%"PRIx64",if_id=0x%"PRIx64 +rdma_poll_cq(int ne, void *ibcq) "Got %d completion(s) from cq %p" +rdmacm_mux(const char *title, int msg_type, int op_code) "%s: msg_type=%d, op_code=%d" +rdmacm_mux_check_op_status(int msg_type, int op_code, int err_code) "resp: msg_type=%d, op_code=%d, err_code=%d" +rdma_mad_message(const char *title, int len, char *data) "mad %s (%d): %s" +rdma_backend_rc_qp_state_init(uint32_t qpn) "RC QP 0x%x switch to INIT" +rdma_backend_ud_qp_state_init(uint32_t qpn, uint32_t qkey) "UD QP 0x%x switch to INIT, qkey=0x%x" +rdma_backend_rc_qp_state_rtr(uint32_t qpn, uint64_t subnet, uint64_t ifid, uint8_t sgid_idx, uint32_t dqpn, uint32_t rq_psn) "RC QP 0x%x switch to RTR, subnet = 0x%"PRIx64", ifid = 0x%"PRIx64 ", sgid_idx=%d, dqpn=0x%x, rq_psn=0x%x" +rdma_backend_ud_qp_state_rtr(uint32_t qpn, uint32_t qkey) "UD QP 0x%x switch to RTR, qkey=0x%x" +rdma_backend_rc_qp_state_rts(uint32_t qpn, uint32_t sq_psn) "RC QP 0x%x switch to RTS, sq_psn=0x%x, " +rdma_backend_ud_qp_state_rts(uint32_t qpn, uint32_t sq_psn, uint32_t qkey) "UD QP 0x%x switch to RTS, sq_psn=0x%x, qkey=0x%x" +rdma_backend_get_gid_index(uint64_t subnet, uint64_t ifid, int gid_idx) "subnet=0x%"PRIx64", ifid=0x%"PRIx64 ", gid_idx=%d" +rdma_backend_gid_change(const char *op, uint64_t subnet, uint64_t ifid) "%s subnet=0x%"PRIx64", ifid=0x%"PRIx64 + +# hw/rdma/rdma_rm.c +rdma_res_tbl_get(char *name, uint32_t handle) "tbl %s, handle %d" +rdma_res_tbl_alloc(char *name, uint32_t handle) "tbl %s, handle %d" +rdma_res_tbl_dealloc(char *name, uint32_t handle) "tbl %s, handle %d" +rdma_rm_alloc_mr(uint32_t mr_handle, void *host_virt, uint64_t guest_start, uint64_t guest_length, int access_flags) "mr_handle=%d, host_virt=%p, guest_start=0x%"PRIx64", length=%" PRId64", access_flags=0x%x" +rdma_rm_dealloc_mr(uint32_t mr_handle, uint64_t guest_start) "mr_handle=%d, guest_start=0x%"PRIx64 +rdma_rm_alloc_qp(uint32_t rm_qpn, uint32_t backend_qpn, uint8_t qp_type) "rm_qpn=%d, backend_qpn=0x%x, qp_type=%d" +rdma_rm_modify_qp(uint32_t qpn, uint32_t attr_mask, int qp_state, uint8_t sgid_idx) "qpn=0x%x, attr_mask=0x%x, qp_state=%d, sgid_idx=%d" + +# hw/rdma/rdma_utils.c +rdma_pci_dma_map(uint64_t addr, void *vaddr, uint64_t len) "0x%"PRIx64" -> %p (len=%" PRId64")" +rdma_pci_dma_unmap(void *vaddr) "%p" diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h index ffae36986e..0879224957 100644 --- a/hw/rdma/vmw/pvrdma.h +++ b/hw/rdma/vmw/pvrdma.h @@ -127,6 +127,6 @@ static inline void post_interrupt(PVRDMADev *dev, unsigned vector) } } -int execute_command(PVRDMADev *dev); +int pvrdma_exec_cmd(PVRDMADev *dev); #endif diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index 89920887bf..21a55e225a 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -14,7 +14,6 @@ */ #include "qemu/osdep.h" -#include "qemu/error-report.h" #include "cpu.h" #include "hw/hw.h" #include "hw/pci/pci.h" @@ -24,6 +23,7 @@ #include "../rdma_rm.h" #include "../rdma_utils.h" +#include "trace.h" #include "pvrdma.h" #include "standard-headers/rdma/vmw_pvrdma-abi.h" @@ -35,40 +35,38 @@ static void *pvrdma_map_to_pdir(PCIDevice *pdev, uint64_t pdir_dma, void *host_virt = NULL, *curr_page; if (!nchunks) { - pr_dbg("nchunks=0\n"); + rdma_error_report("Got nchunks=0"); return NULL; } dir = rdma_pci_dma_map(pdev, pdir_dma, TARGET_PAGE_SIZE); if (!dir) { - error_report("PVRDMA: Failed to map to page directory"); + rdma_error_report("Failed to map to page directory"); return NULL; } tbl = rdma_pci_dma_map(pdev, dir[0], TARGET_PAGE_SIZE); if (!tbl) { - error_report("PVRDMA: Failed to map to page table 0"); + rdma_error_report("Failed to map to page table 0"); goto out_unmap_dir; } curr_page = rdma_pci_dma_map(pdev, (dma_addr_t)tbl[0], TARGET_PAGE_SIZE); if (!curr_page) { - error_report("PVRDMA: Failed to map the first page"); + rdma_error_report("Failed to map the page 0"); goto out_unmap_tbl; } host_virt = mremap(curr_page, 0, length, MREMAP_MAYMOVE); - pr_dbg("mremap %p -> %p\n", curr_page, host_virt); if (host_virt == MAP_FAILED) { host_virt = NULL; - error_report("PVRDMA: Failed to remap memory for host_virt"); + rdma_error_report("Failed to remap memory for host_virt"); goto out_unmap_tbl; } + trace_pvrdma_map_to_pdir_host_virt(curr_page, host_virt); rdma_pci_dma_unmap(pdev, curr_page, TARGET_PAGE_SIZE); - pr_dbg("host_virt=%p\n", host_virt); - dir_idx = 0; tbl_idx = 1; addr_idx = 1; @@ -76,28 +74,28 @@ static void *pvrdma_map_to_pdir(PCIDevice *pdev, uint64_t pdir_dma, if (tbl_idx == TARGET_PAGE_SIZE / sizeof(uint64_t)) { tbl_idx = 0; dir_idx++; - pr_dbg("Mapping to table %d\n", dir_idx); rdma_pci_dma_unmap(pdev, tbl, TARGET_PAGE_SIZE); tbl = rdma_pci_dma_map(pdev, dir[dir_idx], TARGET_PAGE_SIZE); if (!tbl) { - error_report("PVRDMA: Failed to map to page table %d", dir_idx); + rdma_error_report("Failed to map to page table %d", dir_idx); goto out_unmap_host_virt; } } - pr_dbg("guest_dma[%d]=0x%" PRIx64 "\n", addr_idx, tbl[tbl_idx]); - curr_page = rdma_pci_dma_map(pdev, (dma_addr_t)tbl[tbl_idx], TARGET_PAGE_SIZE); if (!curr_page) { - error_report("PVRDMA: Failed to map to page %d, dir %d", tbl_idx, - dir_idx); + rdma_error_report("Failed to map to page %d, dir %d", tbl_idx, + dir_idx); goto out_unmap_host_virt; } mremap(curr_page, 0, TARGET_PAGE_SIZE, MREMAP_MAYMOVE | MREMAP_FIXED, host_virt + TARGET_PAGE_SIZE * addr_idx); + trace_pvrdma_map_to_pdir_next_page(addr_idx, curr_page, host_virt + + TARGET_PAGE_SIZE * addr_idx); + rdma_pci_dma_unmap(pdev, curr_page, TARGET_PAGE_SIZE); addr_idx++; @@ -127,7 +125,6 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_query_port_resp *resp = &rsp->query_port_resp; struct pvrdma_port_attr attrs = {0}; - pr_dbg("port=%d\n", cmd->port_num); if (cmd->port_num > MAX_PORTS) { return -EINVAL; } @@ -159,12 +156,10 @@ static int query_pkey(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_query_pkey *cmd = &req->query_pkey; struct pvrdma_cmd_query_pkey_resp *resp = &rsp->query_pkey_resp; - pr_dbg("port=%d\n", cmd->port_num); if (cmd->port_num > MAX_PORTS) { return -EINVAL; } - pr_dbg("index=%d\n", cmd->index); if (cmd->index > MAX_PKEYS) { return -EINVAL; } @@ -172,7 +167,6 @@ static int query_pkey(PVRDMADev *dev, union pvrdma_cmd_req *req, memset(resp, 0, sizeof(*resp)); resp->pkey = PVRDMA_PKEY; - pr_dbg("pkey=0x%x\n", resp->pkey); return 0; } @@ -184,8 +178,6 @@ static int create_pd(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_create_pd_resp *resp = &rsp->create_pd_resp; int rc; - pr_dbg("context=0x%x\n", cmd->ctx_handle ? cmd->ctx_handle : 0); - memset(resp, 0, sizeof(*resp)); rc = rdma_rm_alloc_pd(&dev->rdma_dev_res, &dev->backend_dev, &resp->pd_handle, cmd->ctx_handle); @@ -198,8 +190,6 @@ static int destroy_pd(PVRDMADev *dev, union pvrdma_cmd_req *req, { struct pvrdma_cmd_destroy_pd *cmd = &req->destroy_pd; - pr_dbg("pd_handle=%d\n", cmd->pd_handle); - rdma_rm_dealloc_pd(&dev->rdma_dev_res, cmd->pd_handle); return 0; @@ -216,15 +206,11 @@ static int create_mr(PVRDMADev *dev, union pvrdma_cmd_req *req, memset(resp, 0, sizeof(*resp)); - pr_dbg("pd_handle=%d\n", cmd->pd_handle); - pr_dbg("access_flags=0x%x\n", cmd->access_flags); - pr_dbg("flags=0x%x\n", cmd->flags); - if (!(cmd->flags & PVRDMA_MR_FLAG_DMA)) { host_virt = pvrdma_map_to_pdir(pci_dev, cmd->pdir_dma, cmd->nchunks, cmd->length); if (!host_virt) { - pr_dbg("Failed to map to pdir\n"); + rdma_error_report("Failed to map to pdir"); return -EINVAL; } } @@ -244,8 +230,6 @@ static int destroy_mr(PVRDMADev *dev, union pvrdma_cmd_req *req, { struct pvrdma_cmd_destroy_mr *cmd = &req->destroy_mr; - pr_dbg("mr_handle=%d\n", cmd->mr_handle); - rdma_rm_dealloc_mr(&dev->rdma_dev_res, cmd->mr_handle); return 0; @@ -260,20 +244,19 @@ static int create_cq_ring(PCIDevice *pci_dev , PvrdmaRing **ring, char ring_name[MAX_RING_NAME_SZ]; if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) { - pr_dbg("invalid nchunks: %d\n", nchunks); + rdma_error_report("Got invalid nchunks: %d", nchunks); return rc; } - pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)pdir_dma); dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE); if (!dir) { - pr_dbg("Failed to map to CQ page directory\n"); + rdma_error_report("Failed to map to CQ page directory"); goto out; } tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); if (!tbl) { - pr_dbg("Failed to map to CQ page table\n"); + rdma_error_report("Failed to map to CQ page table"); goto out; } @@ -284,7 +267,7 @@ static int create_cq_ring(PCIDevice *pci_dev , PvrdmaRing **ring, rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); if (!r->ring_state) { - pr_dbg("Failed to map to CQ ring state\n"); + rdma_error_report("Failed to map to CQ ring state"); goto out_free_ring; } @@ -339,8 +322,6 @@ static int create_cq(PVRDMADev *dev, union pvrdma_cmd_req *req, return rc; } - pr_dbg("ring=%p\n", ring); - rc = rdma_rm_alloc_cq(&dev->rdma_dev_res, &dev->backend_dev, cmd->cqe, &resp->cq_handle, ring); if (rc) { @@ -359,11 +340,9 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req, RdmaRmCQ *cq; PvrdmaRing *ring; - pr_dbg("cq_handle=%d\n", cmd->cq_handle); - cq = rdma_rm_get_cq(&dev->rdma_dev_res, cmd->cq_handle); if (!cq) { - pr_dbg("Invalid CQ handle\n"); + rdma_error_report("Got invalid CQ handle"); return -EINVAL; } @@ -388,42 +367,33 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, if (!spages || spages > PVRDMA_MAX_FAST_REG_PAGES || !rpages || rpages > PVRDMA_MAX_FAST_REG_PAGES) { - pr_dbg("invalid pages: %d, %d\n", spages, rpages); + rdma_error_report("Got invalid page count for QP ring: %d, %d", spages, + rpages); return rc; } - pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)pdir_dma); dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE); if (!dir) { - pr_dbg("Failed to map to CQ page directory\n"); + rdma_error_report("Failed to map to CQ page directory"); goto out; } tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); if (!tbl) { - pr_dbg("Failed to map to CQ page table\n"); + rdma_error_report("Failed to map to CQ page table"); goto out; } sr = g_malloc(2 * sizeof(*rr)); rr = &sr[1]; - pr_dbg("sring=%p\n", sr); - pr_dbg("rring=%p\n", rr); *rings = sr; - pr_dbg("scqe=%d\n", scqe); - pr_dbg("smax_sge=%d\n", smax_sge); - pr_dbg("spages=%d\n", spages); - pr_dbg("rcqe=%d\n", rcqe); - pr_dbg("rmax_sge=%d\n", rmax_sge); - pr_dbg("rpages=%d\n", rpages); - /* Create send ring */ sr->ring_state = (struct pvrdma_ring *) rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); if (!sr->ring_state) { - pr_dbg("Failed to map to CQ ring state\n"); + rdma_error_report("Failed to map to CQ ring state"); goto out_free_sr_mem; } @@ -468,9 +438,7 @@ out: static void destroy_qp_rings(PvrdmaRing *ring) { - pr_dbg("sring=%p\n", &ring[0]); pvrdma_ring_free(&ring[0]); - pr_dbg("rring=%p\n", &ring[1]); pvrdma_ring_free(&ring[1]); rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE); @@ -487,9 +455,6 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, memset(resp, 0, sizeof(*resp)); - pr_dbg("total_chunks=%d\n", cmd->total_chunks); - pr_dbg("send_chunks=%d\n", cmd->send_chunks); - rc = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings, cmd->max_send_wr, cmd->max_send_sge, cmd->send_chunks, cmd->max_recv_wr, cmd->max_recv_sge, @@ -498,8 +463,6 @@ static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, return rc; } - pr_dbg("rings=%p\n", rings); - rc = rdma_rm_alloc_qp(&dev->rdma_dev_res, cmd->pd_handle, cmd->qp_type, cmd->max_send_wr, cmd->max_send_sge, cmd->send_cq_handle, cmd->max_recv_wr, @@ -525,8 +488,6 @@ static int modify_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_modify_qp *cmd = &req->modify_qp; int rc; - pr_dbg("qp_handle=%d\n", cmd->qp_handle); - memset(rsp, 0, sizeof(*rsp)); /* No need to verify sgid_index since it is u8 */ @@ -551,9 +512,6 @@ static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, struct ibv_qp_init_attr init_attr; int rc; - pr_dbg("qp_handle=%d\n", cmd->qp_handle); - pr_dbg("attr_mask=0x%x\n", cmd->attr_mask); - memset(rsp, 0, sizeof(*rsp)); rc = rdma_rm_query_qp(&dev->rdma_dev_res, &dev->backend_dev, cmd->qp_handle, @@ -572,7 +530,6 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, qp = rdma_rm_get_qp(&dev->rdma_dev_res, cmd->qp_handle); if (!qp) { - pr_dbg("Invalid QP handle\n"); return -EINVAL; } @@ -591,16 +548,10 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, int rc; union ibv_gid *gid = (union ibv_gid *)&cmd->new_gid; - pr_dbg("index=%d\n", cmd->index); - if (cmd->index >= MAX_PORT_GIDS) { return -EINVAL; } - pr_dbg("gid[%d]=0x%llx,0x%llx\n", cmd->index, - (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix), - (long long unsigned int)be64_to_cpu(gid->global.interface_id)); - rc = rdma_rm_add_gid(&dev->rdma_dev_res, &dev->backend_dev, dev->backend_eth_device_name, gid, cmd->index); @@ -614,8 +565,6 @@ static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_destroy_bind *cmd = &req->destroy_bind; - pr_dbg("index=%d\n", cmd->index); - if (cmd->index >= MAX_PORT_GIDS) { return -EINVAL; } @@ -633,8 +582,6 @@ static int create_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_create_uc_resp *resp = &rsp->create_uc_resp; int rc; - pr_dbg("pfn=%d\n", cmd->pfn); - memset(resp, 0, sizeof(*resp)); rc = rdma_rm_alloc_uc(&dev->rdma_dev_res, cmd->pfn, &resp->ctx_handle); @@ -646,8 +593,6 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, { struct pvrdma_cmd_destroy_uc *cmd = &req->destroy_uc; - pr_dbg("ctx_handle=%d\n", cmd->ctx_handle); - rdma_rm_dealloc_uc(&dev->rdma_dev_res, cmd->ctx_handle); return 0; @@ -680,22 +625,21 @@ static struct cmd_handler cmd_handlers[] = { {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind}, }; -int execute_command(PVRDMADev *dev) +int pvrdma_exec_cmd(PVRDMADev *dev) { int err = 0xFFFF; DSRInfo *dsr_info; dsr_info = &dev->dsr_info; - pr_dbg("cmd=%d\n", dsr_info->req->hdr.cmd); if (dsr_info->req->hdr.cmd >= sizeof(cmd_handlers) / sizeof(struct cmd_handler)) { - pr_dbg("Unsupported command\n"); + rdma_error_report("Unsupported command"); goto out; } if (!cmd_handlers[dsr_info->req->hdr.cmd].exec) { - pr_dbg("Unsupported command (not implemented yet)\n"); + rdma_error_report("Unsupported command (not implemented yet)"); goto out; } @@ -704,7 +648,8 @@ int execute_command(PVRDMADev *dev) dsr_info->rsp->hdr.response = dsr_info->req->hdr.response; dsr_info->rsp->hdr.ack = cmd_handlers[dsr_info->req->hdr.cmd].ack; dsr_info->rsp->hdr.err = err < 0 ? -err : 0; - pr_dbg("rsp->hdr.err=%d\n", dsr_info->rsp->hdr.err); + + trace_pvrdma_exec_cmd(dsr_info->req->hdr.cmd, dsr_info->rsp->hdr.err); out: set_reg_val(dev, PVRDMA_REG_ERR, err); diff --git a/hw/rdma/vmw/pvrdma_dev_ring.c b/hw/rdma/vmw/pvrdma_dev_ring.c index e8e5b502f6..d7bc7f5ccc 100644 --- a/hw/rdma/vmw/pvrdma_dev_ring.c +++ b/hw/rdma/vmw/pvrdma_dev_ring.c @@ -17,6 +17,8 @@ #include "hw/pci/pci.h" #include "cpu.h" +#include "trace.h" + #include "../rdma_utils.h" #include "standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h" #include "pvrdma_dev_ring.h" @@ -30,13 +32,10 @@ int pvrdma_ring_init(PvrdmaRing *ring, const char *name, PCIDevice *dev, strncpy(ring->name, name, MAX_RING_NAME_SZ); ring->name[MAX_RING_NAME_SZ - 1] = 0; - pr_dbg("Initializing %s ring\n", ring->name); ring->dev = dev; ring->ring_state = ring_state; ring->max_elems = max_elems; ring->elem_sz = elem_sz; - pr_dbg("ring->elem_sz=%zu\n", ring->elem_sz); - pr_dbg("npages=%d\n", npages); /* TODO: Give a moment to think if we want to redo driver settings atomic_set(&ring->ring_state->prod_tail, 0); atomic_set(&ring->ring_state->cons_head, 0); @@ -46,14 +45,14 @@ int pvrdma_ring_init(PvrdmaRing *ring, const char *name, PCIDevice *dev, for (i = 0; i < npages; i++) { if (!tbl[i]) { - pr_err("npages=%ld but tbl[%d] is NULL\n", (long)npages, i); + rdma_error_report("npages=%d but tbl[%d] is NULL", npages, i); continue; } ring->pages[i] = rdma_pci_dma_map(dev, tbl[i], TARGET_PAGE_SIZE); if (!ring->pages[i]) { rc = -ENOMEM; - pr_dbg("Failed to map to page %d\n", i); + rdma_error_report("Failed to map to page %d in ring %s", i, name); goto out_free; } memset(ring->pages[i], 0, TARGET_PAGE_SIZE); @@ -78,7 +77,7 @@ void *pvrdma_ring_next_elem_read(PvrdmaRing *ring) e = pvrdma_idx_ring_has_data(ring->ring_state, ring->max_elems, &idx); if (e <= 0) { - pr_dbg("No more data in ring\n"); + trace_pvrdma_ring_next_elem_read_no_data(ring->name); return NULL; } @@ -89,11 +88,6 @@ void *pvrdma_ring_next_elem_read(PvrdmaRing *ring) void pvrdma_ring_read_inc(PvrdmaRing *ring) { pvrdma_idx_ring_inc(&ring->ring_state->cons_head, ring->max_elems); - /* - pr_dbg("%s: t=%d, h=%d, m=%ld\n", ring->name, - ring->ring_state->prod_tail, ring->ring_state->cons_head, - ring->max_elems); - */ } void *pvrdma_ring_next_elem_write(PvrdmaRing *ring) @@ -103,13 +97,13 @@ void *pvrdma_ring_next_elem_write(PvrdmaRing *ring) idx = pvrdma_idx_ring_has_space(ring->ring_state, ring->max_elems, &tail); if (idx <= 0) { - pr_dbg("CQ is full\n"); + rdma_error_report("CQ is full"); return NULL; } idx = pvrdma_idx(&ring->ring_state->prod_tail, ring->max_elems); if (idx < 0 || tail != idx) { - pr_dbg("invalid idx\n"); + rdma_error_report("Invalid idx %d", idx); return NULL; } @@ -120,11 +114,6 @@ void *pvrdma_ring_next_elem_write(PvrdmaRing *ring) void pvrdma_ring_write_inc(PvrdmaRing *ring) { pvrdma_idx_ring_inc(&ring->ring_state->prod_tail, ring->max_elems); - /* - pr_dbg("%s: t=%d, h=%d, m=%ld\n", ring->name, - ring->ring_state->prod_tail, ring->ring_state->cons_head, - ring->max_elems); - */ } void pvrdma_ring_free(PvrdmaRing *ring) @@ -137,7 +126,6 @@ void pvrdma_ring_free(PvrdmaRing *ring) return; } - pr_dbg("ring->npages=%d\n", ring->npages); while (ring->npages--) { rdma_pci_dma_unmap(ring->dev, ring->pages[ring->npages], TARGET_PAGE_SIZE); diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index d2bdb5ba8c..81ae08bd8d 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -69,25 +69,22 @@ static int init_dev_ring(PvrdmaRing *ring, struct pvrdma_ring **ring_state, uint64_t *dir, *tbl; int rc = 0; - pr_dbg("Initializing device ring %s\n", name); - pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)dir_addr); - pr_dbg("num_pages=%d\n", num_pages); dir = rdma_pci_dma_map(pci_dev, dir_addr, TARGET_PAGE_SIZE); if (!dir) { - pr_err("Failed to map to page directory\n"); + rdma_error_report("Failed to map to page directory (ring %s)", name); rc = -ENOMEM; goto out; } tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); if (!tbl) { - pr_err("Failed to map to page table\n"); + rdma_error_report("Failed to map to page table (ring %s)", name); rc = -ENOMEM; goto out_free_dir; } *ring_state = rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); if (!*ring_state) { - pr_err("Failed to map to ring state\n"); + rdma_error_report("Failed to map to ring state (ring %s)", name); rc = -ENOMEM; goto out_free_tbl; } @@ -100,7 +97,6 @@ static int init_dev_ring(PvrdmaRing *ring, struct pvrdma_ring **ring_state, sizeof(struct pvrdma_cqne), (dma_addr_t *)&tbl[1], (dma_addr_t)num_pages - 1); if (rc) { - pr_err("Failed to initialize ring\n"); rc = -ENOMEM; goto out_free_ring_state; } @@ -155,11 +151,10 @@ static int load_dsr(PVRDMADev *dev) free_dsr(dev); /* Map to DSR */ - pr_dbg("dsr_dma=0x%llx\n", (long long unsigned int)dev->dsr_info.dma); dev->dsr_info.dsr = rdma_pci_dma_map(pci_dev, dev->dsr_info.dma, sizeof(struct pvrdma_device_shared_region)); if (!dev->dsr_info.dsr) { - pr_err("Failed to map to DSR\n"); + rdma_error_report("Failed to map to DSR"); rc = -ENOMEM; goto out; } @@ -169,21 +164,19 @@ static int load_dsr(PVRDMADev *dev) dsr = dsr_info->dsr; /* Map to command slot */ - pr_dbg("cmd_dma=0x%llx\n", (long long unsigned int)dsr->cmd_slot_dma); dsr_info->req = rdma_pci_dma_map(pci_dev, dsr->cmd_slot_dma, sizeof(union pvrdma_cmd_req)); if (!dsr_info->req) { - pr_err("Failed to map to command slot address\n"); + rdma_error_report("Failed to map to command slot address"); rc = -ENOMEM; goto out_free_dsr; } /* Map to response slot */ - pr_dbg("rsp_dma=0x%llx\n", (long long unsigned int)dsr->resp_slot_dma); dsr_info->rsp = rdma_pci_dma_map(pci_dev, dsr->resp_slot_dma, sizeof(union pvrdma_cmd_resp)); if (!dsr_info->rsp) { - pr_err("Failed to map to response slot address\n"); + rdma_error_report("Failed to map to response slot address"); rc = -ENOMEM; goto out_free_req; } @@ -193,7 +186,6 @@ static int load_dsr(PVRDMADev *dev) pci_dev, dsr->cq_ring_pages.pdir_dma, dsr->cq_ring_pages.num_pages); if (rc) { - pr_err("Failed to map to initialize CQ ring\n"); rc = -ENOMEM; goto out_free_rsp; } @@ -203,7 +195,6 @@ static int load_dsr(PVRDMADev *dev) "dev_async", pci_dev, dsr->async_ring_pages.pdir_dma, dsr->async_ring_pages.num_pages); if (rc) { - pr_err("Failed to map to initialize event ring\n"); rc = -ENOMEM; goto out_free_rsp; } @@ -230,24 +221,15 @@ static void init_dsr_dev_caps(PVRDMADev *dev) struct pvrdma_device_shared_region *dsr; if (dev->dsr_info.dsr == NULL) { - pr_err("Can't initialized DSR\n"); + rdma_error_report("Can't initialized DSR"); return; } dsr = dev->dsr_info.dsr; - dsr->caps.fw_ver = PVRDMA_FW_VERSION; - pr_dbg("fw_ver=0x%" PRIx64 "\n", dsr->caps.fw_ver); - dsr->caps.mode = PVRDMA_DEVICE_MODE_ROCE; - pr_dbg("mode=%d\n", dsr->caps.mode); - dsr->caps.gid_types |= PVRDMA_GID_TYPE_FLAG_ROCE_V1; - pr_dbg("gid_types=0x%x\n", dsr->caps.gid_types); - dsr->caps.max_uar = RDMA_BAR2_UAR_SIZE; - pr_dbg("max_uar=%d\n", dsr->caps.max_uar); - dsr->caps.max_mr_size = dev->dev_attr.max_mr_size; dsr->caps.max_qp = dev->dev_attr.max_qp; dsr->caps.max_qp_wr = dev->dev_attr.max_qp_wr; @@ -257,23 +239,11 @@ static void init_dsr_dev_caps(PVRDMADev *dev) dsr->caps.max_mr = dev->dev_attr.max_mr; dsr->caps.max_pd = dev->dev_attr.max_pd; dsr->caps.max_ah = dev->dev_attr.max_ah; - dsr->caps.gid_tbl_len = MAX_GIDS; - pr_dbg("gid_tbl_len=%d\n", dsr->caps.gid_tbl_len); - dsr->caps.sys_image_guid = 0; - pr_dbg("sys_image_guid=%" PRIx64 "\n", dsr->caps.sys_image_guid); - dsr->caps.node_guid = dev->node_guid; - pr_dbg("node_guid=%" PRIx64 "\n", be64_to_cpu(dsr->caps.node_guid)); - dsr->caps.phys_port_cnt = MAX_PORTS; - pr_dbg("phys_port_cnt=%d\n", dsr->caps.phys_port_cnt); - dsr->caps.max_pkeys = MAX_PKEYS; - pr_dbg("max_pkeys=%d\n", dsr->caps.max_pkeys); - - pr_dbg("Initialized\n"); } static void uninit_msix(PCIDevice *pdev, int used_vectors) @@ -288,7 +258,7 @@ static void uninit_msix(PCIDevice *pdev, int used_vectors) msix_uninit(pdev, &dev->msix, &dev->msix); } -static int init_msix(PCIDevice *pdev, Error **errp) +static int init_msix(PCIDevice *pdev) { PVRDMADev *dev = PVRDMA_DEV(pdev); int i; @@ -299,14 +269,14 @@ static int init_msix(PCIDevice *pdev, Error **errp) RDMA_MSIX_PBA, 0, NULL); if (rc < 0) { - error_setg(errp, "Failed to initialize MSI-X"); + rdma_error_report("Failed to initialize MSI-X"); return rc; } for (i = 0; i < RDMA_MAX_INTRS; i++) { rc = msix_vector_use(PCI_DEVICE(dev), i); if (rc < 0) { - error_setg(errp, "Fail mark MSI-X vector %d", i); + rdma_error_report("Fail mark MSI-X vector %d", i); uninit_msix(pdev, i); return rc; } @@ -319,9 +289,6 @@ static void pvrdma_fini(PCIDevice *pdev) { PVRDMADev *dev = PVRDMA_DEV(pdev); - pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn)); - pvrdma_qp_ops_fini(); rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev, @@ -335,8 +302,8 @@ static void pvrdma_fini(PCIDevice *pdev) uninit_msix(pdev, RDMA_MAX_INTRS); } - pr_dbg("Device %s %x.%x is down\n", pdev->name, PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn)); + rdma_info_report("Device %s %x.%x is down", pdev->name, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); } static void pvrdma_stop(PVRDMADev *dev) @@ -353,32 +320,26 @@ static void activate_device(PVRDMADev *dev) { pvrdma_start(dev); set_reg_val(dev, PVRDMA_REG_ERR, 0); - pr_dbg("Device activated\n"); } static int unquiesce_device(PVRDMADev *dev) { - pr_dbg("Device unquiesced\n"); return 0; } static void reset_device(PVRDMADev *dev) { pvrdma_stop(dev); - - pr_dbg("Device reset complete\n"); } -static uint64_t regs_read(void *opaque, hwaddr addr, unsigned size) +static uint64_t pvrdma_regs_read(void *opaque, hwaddr addr, unsigned size) { PVRDMADev *dev = opaque; uint32_t val; - /* pr_dbg("addr=0x%lx, size=%d\n", addr, size); */ - if (get_reg_val(dev, addr, &val)) { - pr_dbg("Error trying to read REG value from address 0x%x\n", - (uint32_t)addr); + rdma_error_report("Failed to read REG value from address 0x%x", + (uint32_t)addr); return -EINVAL; } @@ -387,25 +348,24 @@ static uint64_t regs_read(void *opaque, hwaddr addr, unsigned size) return val; } -static void regs_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) +static void pvrdma_regs_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) { PVRDMADev *dev = opaque; - /* pr_dbg("addr=0x%lx, val=0x%x, size=%d\n", addr, (uint32_t)val, size); */ - if (set_reg_val(dev, addr, val)) { - pr_err("Fail to set REG value, addr=0x%" PRIx64 ", val=0x%" PRIx64 "\n", - addr, val); + rdma_error_report("Failed to set REG value, addr=0x%"PRIx64 ", val=0x%"PRIx64, + addr, val); return; } - trace_pvrdma_regs_write(addr, val); - switch (addr) { case PVRDMA_REG_DSRLOW: + trace_pvrdma_regs_write(addr, val, "DSRLOW", ""); dev->dsr_info.dma = val; break; case PVRDMA_REG_DSRHIGH: + trace_pvrdma_regs_write(addr, val, "DSRHIGH", ""); dev->dsr_info.dma |= val << 32; load_dsr(dev); init_dsr_dev_caps(dev); @@ -413,23 +373,27 @@ static void regs_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) case PVRDMA_REG_CTL: switch (val) { case PVRDMA_DEVICE_CTL_ACTIVATE: + trace_pvrdma_regs_write(addr, val, "CTL", "ACTIVATE"); activate_device(dev); break; case PVRDMA_DEVICE_CTL_UNQUIESCE: + trace_pvrdma_regs_write(addr, val, "CTL", "UNQUIESCE"); unquiesce_device(dev); break; case PVRDMA_DEVICE_CTL_RESET: + trace_pvrdma_regs_write(addr, val, "CTL", "URESET"); reset_device(dev); break; } break; case PVRDMA_REG_IMR: - pr_dbg("Interrupt mask=0x%" PRIx64 "\n", val); + trace_pvrdma_regs_write(addr, val, "INTR_MASK", ""); dev->interrupt_mask = val; break; case PVRDMA_REG_REQUEST: if (val == 0) { - execute_command(dev); + trace_pvrdma_regs_write(addr, val, "REQUEST", ""); + pvrdma_exec_cmd(dev); } break; default: @@ -438,8 +402,8 @@ static void regs_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) } static const MemoryRegionOps regs_ops = { - .read = regs_read, - .write = regs_write, + .read = pvrdma_regs_read, + .write = pvrdma_regs_write, .endianness = DEVICE_LITTLE_ENDIAN, .impl = { .min_access_size = sizeof(uint32_t), @@ -447,54 +411,58 @@ static const MemoryRegionOps regs_ops = { }, }; -static uint64_t uar_read(void *opaque, hwaddr addr, unsigned size) +static uint64_t pvrdma_uar_read(void *opaque, hwaddr addr, unsigned size) { return 0xffffffff; } -static void uar_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) +static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) { PVRDMADev *dev = opaque; - /* pr_dbg("addr=0x%lx, val=0x%x, size=%d\n", addr, (uint32_t)val, size); */ - switch (addr & 0xFFF) { /* Mask with 0xFFF as each UC gets page */ case PVRDMA_UAR_QP_OFFSET: - pr_dbg("UAR QP command, addr=0x%" PRIx64 ", val=0x%" PRIx64 "\n", - (uint64_t)addr, val); if (val & PVRDMA_UAR_QP_SEND) { + trace_pvrdma_uar_write(addr, val, "QP", "SEND", + val & PVRDMA_UAR_HANDLE_MASK, 0); pvrdma_qp_send(dev, val & PVRDMA_UAR_HANDLE_MASK); } if (val & PVRDMA_UAR_QP_RECV) { + trace_pvrdma_uar_write(addr, val, "QP", "RECV", + val & PVRDMA_UAR_HANDLE_MASK, 0); pvrdma_qp_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); } break; case PVRDMA_UAR_CQ_OFFSET: - /* pr_dbg("UAR CQ cmd, addr=0x%x, val=0x%lx\n", (uint32_t)addr, val); */ if (val & PVRDMA_UAR_CQ_ARM) { + trace_pvrdma_uar_write(addr, val, "CQ", "ARM", + val & PVRDMA_UAR_HANDLE_MASK, + !!(val & PVRDMA_UAR_CQ_ARM_SOL)); rdma_rm_req_notify_cq(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK, !!(val & PVRDMA_UAR_CQ_ARM_SOL)); } if (val & PVRDMA_UAR_CQ_ARM_SOL) { - pr_dbg("UAR_CQ_ARM_SOL (%" PRIx64 ")\n", - val & PVRDMA_UAR_HANDLE_MASK); + trace_pvrdma_uar_write(addr, val, "CQ", "ARMSOL - not supported", 0, + 0); } if (val & PVRDMA_UAR_CQ_POLL) { - pr_dbg("UAR_CQ_POLL (%" PRIx64 ")\n", val & PVRDMA_UAR_HANDLE_MASK); + trace_pvrdma_uar_write(addr, val, "CQ", "POLL", + val & PVRDMA_UAR_HANDLE_MASK, 0); pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK); } break; default: - pr_err("Unsupported command, addr=0x%" PRIx64 ", val=0x%" PRIx64 "\n", - addr, val); + rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64, + addr, val); break; } } static const MemoryRegionOps uar_ops = { - .read = uar_read, - .write = uar_write, + .read = pvrdma_uar_read, + .write = pvrdma_uar_write, .endianness = DEVICE_LITTLE_ENDIAN, .impl = { .min_access_size = sizeof(uint32_t), @@ -551,11 +519,9 @@ static void init_dev_caps(PVRDMADev *dev) (wr_sz + sizeof(struct pvrdma_sge) * dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; /* First page is ring state ^^^^ */ - pr_dbg("max_qp_wr=%d\n", dev->dev_attr.max_qp_wr); dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) - TARGET_PAGE_SIZE; /* First page is ring state */ - pr_dbg("max_cqe=%d\n", dev->dev_attr.max_cqe); } static int pvrdma_check_ram_shared(Object *obj, void *opaque) @@ -585,10 +551,8 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) bool ram_shared = false; PCIDevice *func0; - init_pr_dbg(); - - pr_dbg("Initializing device %s %x.%x\n", pdev->name, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + rdma_info_report("Initializing device %s %x.%x", pdev->name, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); if (TARGET_PAGE_SIZE != getpagesize()) { error_setg(errp, "Target page size must be the same as host page size"); @@ -598,8 +562,6 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) func0 = pci_get_function_0(pdev); /* Break if not vmxnet3 device in slot 0 */ if (strcmp(object_get_typename(&func0->qdev.parent_obj), TYPE_VMXNET3)) { - pr_dbg("func0 type is %s\n", - object_get_typename(&func0->qdev.parent_obj)); error_setg(errp, "Device on %x.0 must be %s", PCI_SLOT(pdev->devfn), TYPE_VMXNET3); return; @@ -626,21 +588,21 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) init_regs(pdev); - rc = init_msix(pdev, errp); + rc = init_msix(pdev); if (rc) { goto out; } rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res, dev->backend_device_name, dev->backend_port_num, - &dev->dev_attr, &dev->mad_chr, errp); + &dev->dev_attr, &dev->mad_chr); if (rc) { goto out; } init_dev_caps(dev); - rc = rdma_rm_init(&dev->rdma_dev_res, &dev->dev_attr, errp); + rc = rdma_rm_init(&dev->rdma_dev_res, &dev->dev_attr); if (rc) { goto out; } @@ -656,7 +618,7 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) out: if (rc) { pvrdma_fini(pdev); - error_append_hint(errp, "Device fail to load\n"); + error_append_hint(errp, "Device failed to load\n"); } } diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c index ce5a60e184..16db726dac 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.c +++ b/hw/rdma/vmw/pvrdma_qp_ops.c @@ -19,6 +19,8 @@ #include "../rdma_rm.h" #include "../rdma_backend.h" +#include "trace.h" + #include "pvrdma.h" #include "standard-headers/rdma/vmw_pvrdma-abi.h" #include "pvrdma_qp_ops.h" @@ -55,18 +57,14 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, RdmaRmCQ *cq = rdma_rm_get_cq(&dev->rdma_dev_res, cq_handle); if (unlikely(!cq)) { - pr_dbg("Invalid cqn %d\n", cq_handle); return -EINVAL; } ring = (PvrdmaRing *)cq->opaque; - pr_dbg("ring=%p\n", ring); /* Step #1: Put CQE on CQ ring */ - pr_dbg("Writing CQE\n"); cqe1 = pvrdma_ring_next_elem_write(ring); if (unlikely(!cqe1)) { - pr_dbg("No CQEs in ring\n"); return -EINVAL; } @@ -80,19 +78,13 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, cqe1->wc_flags = wc->wc_flags; cqe1->vendor_err = wc->vendor_err; - pr_dbg("wr_id=%" PRIx64 "\n", cqe1->wr_id); - pr_dbg("qp=0x%lx\n", cqe1->qp); - pr_dbg("opcode=%d\n", cqe1->opcode); - pr_dbg("status=%d\n", cqe1->status); - pr_dbg("byte_len=%d\n", cqe1->byte_len); - pr_dbg("src_qp=%d\n", cqe1->src_qp); - pr_dbg("wc_flags=%d\n", cqe1->wc_flags); - pr_dbg("vendor_err=%d\n", cqe1->vendor_err); + trace_pvrdma_post_cqe(cq_handle, cq->notify, cqe1->wr_id, cqe1->qp, + cqe1->opcode, cqe1->status, cqe1->byte_len, + cqe1->src_qp, cqe1->wc_flags, cqe1->vendor_err); pvrdma_ring_write_inc(ring); /* Step #2: Put CQ number on dsr completion ring */ - pr_dbg("Writing CQNE\n"); cqne = pvrdma_ring_next_elem_write(&dev->dsr_info.cq); if (unlikely(!cqne)) { return -EINVAL; @@ -101,7 +93,6 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, cqne->info = cq_handle; pvrdma_ring_write_inc(&dev->dsr_info.cq); - pr_dbg("cq->notify=%d\n", cq->notify); if (cq->notify != CNT_CLEAR) { if (cq->notify == CNT_ARM) { cq->notify = CNT_CLEAR; @@ -151,23 +142,17 @@ void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) int sgid_idx; union ibv_gid *sgid; - pr_dbg("qp_handle=0x%x\n", qp_handle); - qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle); if (unlikely(!qp)) { - pr_dbg("Invalid qpn\n"); return; } ring = (PvrdmaRing *)qp->opaque; - pr_dbg("sring=%p\n", ring); wqe = (struct PvrdmaSqWqe *)pvrdma_ring_next_elem_read(ring); while (wqe) { CompHandlerCtx *comp_ctx; - pr_dbg("wr_id=%" PRIx64 "\n", wqe->hdr.wr_id); - /* Prepare CQE */ comp_ctx = g_malloc(sizeof(CompHandlerCtx)); comp_ctx->dev = dev; @@ -178,26 +163,25 @@ void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index); if (!sgid) { - pr_dbg("Fail to get gid for idx %d\n", wqe->hdr.wr.ud.av.gid_index); + rdma_error_report("Failed to get gid for idx %d", + wqe->hdr.wr.ud.av.gid_index); complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx); continue; } - pr_dbg("sgid_id=%d, sgid=0x%llx\n", wqe->hdr.wr.ud.av.gid_index, - sgid->global.interface_id); sgid_idx = rdma_rm_get_backend_gid_index(&dev->rdma_dev_res, &dev->backend_dev, wqe->hdr.wr.ud.av.gid_index); if (sgid_idx <= 0) { - pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", - wqe->hdr.wr.ud.av.gid_index); + rdma_error_report("Failed to get bk sgid_idx for sgid_idx %d", + wqe->hdr.wr.ud.av.gid_index); complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx); continue; } if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { - pr_dbg("Invalid num_sge=%d (max %d)\n", wqe->hdr.num_sge, - dev->dev_attr.max_sge); + rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge, + dev->dev_attr.max_sge); complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); continue; } @@ -221,23 +205,17 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) PvrdmaRqWqe *wqe; PvrdmaRing *ring; - pr_dbg("qp_handle=0x%x\n", qp_handle); - qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle); if (unlikely(!qp)) { - pr_dbg("Invalid qpn\n"); return; } ring = &((PvrdmaRing *)qp->opaque)[1]; - pr_dbg("rring=%p\n", ring); wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring); while (wqe) { CompHandlerCtx *comp_ctx; - pr_dbg("wr_id=%" PRIx64 "\n", wqe->hdr.wr_id); - /* Prepare CQE */ comp_ctx = g_malloc(sizeof(CompHandlerCtx)); comp_ctx->dev = dev; @@ -247,8 +225,8 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) comp_ctx->cqe.opcode = IBV_WC_RECV; if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { - pr_dbg("Invalid num_sge=%d (max %d)\n", wqe->hdr.num_sge, - dev->dev_attr.max_sge); + rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge, + dev->dev_attr.max_sge); complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); continue; } @@ -270,7 +248,6 @@ void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle) cq = rdma_rm_get_cq(dev_res, cq_handle); if (!cq) { - pr_dbg("Invalid CQ# %d\n", cq_handle); return; } diff --git a/hw/rdma/vmw/trace-events b/hw/rdma/vmw/trace-events index b3f9e2b19f..0122266ad7 100644 --- a/hw/rdma/vmw/trace-events +++ b/hw/rdma/vmw/trace-events @@ -1,5 +1,17 @@ # See docs/tracing.txt for syntax documentation. # hw/rdma/vmw/pvrdma_main.c -pvrdma_regs_read(uint64_t addr, uint64_t val) "regs[0x%"PRIx64"] = 0x%"PRIx64 -pvrdma_regs_write(uint64_t addr, uint64_t val) "regs[0x%"PRIx64"] = 0x%"PRIx64 +pvrdma_regs_read(uint64_t addr, uint64_t val) "pvrdma.regs[0x%"PRIx64"]=0x%"PRIx64 +pvrdma_regs_write(uint64_t addr, uint64_t val, const char *reg_name, const char *val_name) "pvrdma.regs[0x%"PRIx64"]=0x%"PRIx64" (%s %s)" +pvrdma_uar_write(uint64_t addr, uint64_t val, const char *reg_name, const char *val_name, int val1, int val2) "uar[0x%"PRIx64"]=0x%"PRIx64" (cls=%s, op=%s, obj=%d, val=%d)" + +# hw/rdma/vmw/pvrdma_cmd.c +pvrdma_map_to_pdir_host_virt(void *vfirst, void *vremaped) "mremap %p -> %p" +pvrdma_map_to_pdir_next_page(int page_idx, void *vnext, void *vremaped) "mremap [%d] %p -> %p" +pvrdma_exec_cmd(int cmd, int err) "cmd=%d, err=%d" + +# hw/rdma/vmw/pvrdma_dev_ring.c +pvrdma_ring_next_elem_read_no_data(char *ring_name) "pvrdma_ring %s is empty" + +# hw/rdma/vmw/pvrdma_qp_ops.c +pvrdma_post_cqe(uint32_t cq_handle, int notify, uint64_t wr_id, uint64_t qpn, uint32_t op_code, uint32_t status, uint32_t byte_len, uint32_t src_qp, uint32_t wc_flags, uint32_t vendor_err) "cq_handle=%d, notify=%d, wr_id=0x%"PRIx64", qpn=0x%"PRIx64", opcode=%d, status=%d, byte_len=%d, src_qp=%d, wc_flags=%d, vendor_err=%d" From b20fc7951084bb77e867d114c916e409c3a6821a Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Mon, 11 Mar 2019 03:29:06 -0700 Subject: [PATCH 03/18] hw/rdma: Introduce protected qlist To make code more readable move handling of protected list to a rdma_utils Signed-off-by: Yuval Shaia Reviewed-by: Marcel Apfelbaum Message-Id: <1552300155-25216-3-git-send-email-yuval.shaia@oracle.com> Reviewed-by: Kamal Heib Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 20 +++++-------------- hw/rdma/rdma_backend_defs.h | 8 ++------ hw/rdma/rdma_utils.c | 39 +++++++++++++++++++++++++++++++++++++ hw/rdma/rdma_utils.h | 9 +++++++++ 4 files changed, 55 insertions(+), 21 deletions(-) diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index d138591c86..37edf42215 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -527,9 +527,7 @@ static unsigned int save_mad_recv_buffer(RdmaBackendDev *backend_dev, bctx->up_ctx = ctx; bctx->sge = *sge; - qemu_mutex_lock(&backend_dev->recv_mads_list.lock); - qlist_append_int(backend_dev->recv_mads_list.list, bctx_id); - qemu_mutex_unlock(&backend_dev->recv_mads_list.lock); + rdma_protected_qlist_append_int64(&backend_dev->recv_mads_list, bctx_id); return 0; } @@ -913,23 +911,19 @@ static inline void build_mad_hdr(struct ibv_grh *grh, union ibv_gid *sgid, static void process_incoming_mad_req(RdmaBackendDev *backend_dev, RdmaCmMuxMsg *msg) { - QObject *o_ctx_id; unsigned long cqe_ctx_id; BackendCtx *bctx; char *mad; trace_mad_message("recv", msg->umad.mad, msg->umad_len); - qemu_mutex_lock(&backend_dev->recv_mads_list.lock); - o_ctx_id = qlist_pop(backend_dev->recv_mads_list.list); - qemu_mutex_unlock(&backend_dev->recv_mads_list.lock); - if (!o_ctx_id) { + cqe_ctx_id = rdma_protected_qlist_pop_int64(&backend_dev->recv_mads_list); + if (cqe_ctx_id == -ENOENT) { rdma_warn_report("No more free MADs buffers, waiting for a while"); sleep(THR_POLL_TO); return; } - cqe_ctx_id = qnum_get_uint(qobject_to(QNum, o_ctx_id)); bctx = rdma_rm_get_cqe_ctx(backend_dev->rdma_dev_res, cqe_ctx_id); if (unlikely(!bctx)) { rdma_error_report("No matching ctx for req %ld", cqe_ctx_id); @@ -994,8 +988,7 @@ static int mad_init(RdmaBackendDev *backend_dev, CharBackend *mad_chr_be) return -EIO; } - qemu_mutex_init(&backend_dev->recv_mads_list.lock); - backend_dev->recv_mads_list.list = qlist_new(); + rdma_protected_qlist_init(&backend_dev->recv_mads_list); enable_rdmacm_mux_async(backend_dev); @@ -1010,10 +1003,7 @@ static void mad_fini(RdmaBackendDev *backend_dev) { disable_rdmacm_mux_async(backend_dev); qemu_chr_fe_disconnect(backend_dev->rdmacm_mux.chr_be); - if (backend_dev->recv_mads_list.list) { - qlist_destroy_obj(QOBJECT(backend_dev->recv_mads_list.list)); - qemu_mutex_destroy(&backend_dev->recv_mads_list.lock); - } + rdma_protected_qlist_destroy(&backend_dev->recv_mads_list); } int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev, diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h index 15ae8b970e..a8c15b09ab 100644 --- a/hw/rdma/rdma_backend_defs.h +++ b/hw/rdma/rdma_backend_defs.h @@ -20,6 +20,7 @@ #include "chardev/char-fe.h" #include #include "contrib/rdmacm-mux/rdmacm-mux.h" +#include "rdma_utils.h" typedef struct RdmaDeviceResources RdmaDeviceResources; @@ -30,11 +31,6 @@ typedef struct RdmaBackendThread { bool is_running; /* Set by the thread to report its status */ } RdmaBackendThread; -typedef struct RecvMadList { - QemuMutex lock; - QList *list; -} RecvMadList; - typedef struct RdmaCmMux { CharBackend *chr_be; int can_receive; @@ -48,7 +44,7 @@ typedef struct RdmaBackendDev { struct ibv_context *context; struct ibv_comp_channel *channel; uint8_t port_num; - RecvMadList recv_mads_list; + RdmaProtectedQList recv_mads_list; RdmaCmMux rdmacm_mux; } RdmaBackendDev; diff --git a/hw/rdma/rdma_utils.c b/hw/rdma/rdma_utils.c index b9f07fcda7..0a8abe572d 100644 --- a/hw/rdma/rdma_utils.c +++ b/hw/rdma/rdma_utils.c @@ -14,6 +14,8 @@ */ #include "qemu/osdep.h" +#include "qapi/qmp/qlist.h" +#include "qapi/qmp/qnum.h" #include "trace.h" #include "rdma_utils.h" @@ -51,3 +53,40 @@ void rdma_pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len) pci_dma_unmap(dev, buffer, len, DMA_DIRECTION_TO_DEVICE, 0); } } + +void rdma_protected_qlist_init(RdmaProtectedQList *list) +{ + qemu_mutex_init(&list->lock); + list->list = qlist_new(); +} + +void rdma_protected_qlist_destroy(RdmaProtectedQList *list) +{ + if (list->list) { + qlist_destroy_obj(QOBJECT(list->list)); + qemu_mutex_destroy(&list->lock); + list->list = NULL; + } +} + +void rdma_protected_qlist_append_int64(RdmaProtectedQList *list, int64_t value) +{ + qemu_mutex_lock(&list->lock); + qlist_append_int(list->list, value); + qemu_mutex_unlock(&list->lock); +} + +int64_t rdma_protected_qlist_pop_int64(RdmaProtectedQList *list) +{ + QObject *obj; + + qemu_mutex_lock(&list->lock); + obj = qlist_pop(list->list); + qemu_mutex_unlock(&list->lock); + + if (!obj) { + return -ENOENT; + } + + return qnum_get_uint(qobject_to(QNum, obj)); +} diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h index acd148837f..a8bf1d4fec 100644 --- a/hw/rdma/rdma_utils.h +++ b/hw/rdma/rdma_utils.h @@ -29,8 +29,17 @@ #define rdma_info_report(fmt, ...) \ info_report("%s: " fmt, "rdma", ## __VA_ARGS__) +typedef struct RdmaProtectedQList { + QemuMutex lock; + QList *list; +} RdmaProtectedQList; + void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen); void rdma_pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len); +void rdma_protected_qlist_init(RdmaProtectedQList *list); +void rdma_protected_qlist_destroy(RdmaProtectedQList *list); +void rdma_protected_qlist_append_int64(RdmaProtectedQList *list, int64_t value); +int64_t rdma_protected_qlist_pop_int64(RdmaProtectedQList *list); static inline void addrconf_addr_eui48(uint8_t *eui, const char *addr) { From 2cfa95300908f401f5b9bdf3de734cf6228a2722 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Mon, 11 Mar 2019 03:29:07 -0700 Subject: [PATCH 04/18] hw/rdma: Protect against concurrent execution of poll_cq The function rdma_poll_cq is called from two contexts - completion handler thread which sense new completion on backend channel and explicitly as result of guest issuing poll_cq command. Add lock to protect against concurrent executions. Signed-off-by: Yuval Shaia Reviewed-by: Marcel Apfelbaum Message-Id: <1552300155-25216-4-git-send-email-yuval.shaia@oracle.com> Reviewed-by: Kamal Heib Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 2 ++ hw/rdma/rdma_rm.c | 4 ++++ hw/rdma/rdma_rm_defs.h | 1 + 3 files changed, 7 insertions(+) diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index 37edf42215..18975401d9 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -70,6 +70,7 @@ static void rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) BackendCtx *bctx; struct ibv_wc wc[2]; + qemu_mutex_lock(&rdma_dev_res->lock); do { ne = ibv_poll_cq(ibcq, ARRAY_SIZE(wc), wc); @@ -89,6 +90,7 @@ static void rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) g_free(bctx); } } while (ne > 0); + qemu_mutex_unlock(&rdma_dev_res->lock); if (ne < 0) { rdma_error_report("ibv_poll_cq fail, rc=%d, errno=%d", ne, errno); diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index 66177b42f5..7ea62a9e60 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -617,12 +617,16 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr) init_ports(dev_res); + qemu_mutex_init(&dev_res->lock); + return 0; } void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, const char *ifname) { + qemu_mutex_destroy(&dev_res->lock); + fini_ports(dev_res, backend_dev, ifname); res_tbl_free(&dev_res->uc_tbl); diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h index 0ba61d1838..f0ee1f3072 100644 --- a/hw/rdma/rdma_rm_defs.h +++ b/hw/rdma/rdma_rm_defs.h @@ -105,6 +105,7 @@ typedef struct RdmaDeviceResources { RdmaRmResTbl cq_tbl; RdmaRmResTbl cqe_ctx_tbl; GHashTable *qp_hash; /* Keeps mapping between real and emulated */ + QemuMutex lock; } RdmaDeviceResources; #endif From c2dd117b38583f89d6a2e4a6dfc6d693990ffc39 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Mon, 11 Mar 2019 03:29:08 -0700 Subject: [PATCH 05/18] hw/pvrdma: Collect debugging statistics Add counters to enable enhance debugging Signed-off-by: Yuval Shaia Message-Id: <1552300155-25216-5-git-send-email-yuval.shaia@oracle.com> Reviewed-by: Kamal Heib Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 70 +++++++++++++++++++++++++++++---------- hw/rdma/rdma_rm.c | 7 ++++ hw/rdma/rdma_rm_defs.h | 27 ++++++++++++++- hw/rdma/vmw/pvrdma.h | 10 ++++++ hw/rdma/vmw/pvrdma_cmd.c | 2 ++ hw/rdma/vmw/pvrdma_main.c | 8 +++++ 6 files changed, 106 insertions(+), 18 deletions(-) diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index 18975401d9..e8af9741b1 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -64,9 +64,9 @@ static inline void complete_work(enum ibv_wc_status status, uint32_t vendor_err, comp_handler(ctx, &wc); } -static void rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) +static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) { - int i, ne; + int i, ne, total_ne = 0; BackendCtx *bctx; struct ibv_wc wc[2]; @@ -89,12 +89,18 @@ static void rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id); g_free(bctx); } + total_ne += ne; } while (ne > 0); + atomic_sub(&rdma_dev_res->stats.missing_cqe, total_ne); qemu_mutex_unlock(&rdma_dev_res->lock); if (ne < 0) { rdma_error_report("ibv_poll_cq fail, rc=%d, errno=%d", ne, errno); } + + rdma_dev_res->stats.completions += total_ne; + + return total_ne; } static void *comp_handler_thread(void *arg) @@ -122,6 +128,9 @@ static void *comp_handler_thread(void *arg) while (backend_dev->comp_thread.run) { do { rc = qemu_poll_ns(pfds, 1, THR_POLL_TO * (int64_t)SCALE_MS); + if (!rc) { + backend_dev->rdma_dev_res->stats.poll_cq_ppoll_to++; + } } while (!rc && backend_dev->comp_thread.run); if (backend_dev->comp_thread.run) { @@ -138,6 +147,7 @@ static void *comp_handler_thread(void *arg) errno); } + backend_dev->rdma_dev_res->stats.poll_cq_from_bk++; rdma_poll_cq(backend_dev->rdma_dev_res, ev_cq); ibv_ack_cq_events(ev_cq, 1); @@ -271,7 +281,13 @@ int rdma_backend_query_port(RdmaBackendDev *backend_dev, void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq) { - rdma_poll_cq(rdma_dev_res, cq->ibcq); + int polled; + + rdma_dev_res->stats.poll_cq_from_guest++; + polled = rdma_poll_cq(rdma_dev_res, cq->ibcq); + if (!polled) { + rdma_dev_res->stats.poll_cq_from_guest_empty++; + } } static GHashTable *ah_hash; @@ -333,7 +349,7 @@ static void ah_cache_init(void) static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, struct ibv_sge *dsge, struct ibv_sge *ssge, - uint8_t num_sge) + uint8_t num_sge, uint64_t *total_length) { RdmaRmMR *mr; int ssge_idx; @@ -349,6 +365,8 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, dsge->length = ssge[ssge_idx].length; dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr); + *total_length += dsge->length; + dsge++; } @@ -445,8 +463,10 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, rc = mad_send(backend_dev, sgid_idx, sgid, sge, num_sge); if (rc) { complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx); + backend_dev->rdma_dev_res->stats.mad_tx_err++; } else { complete_work(IBV_WC_SUCCESS, 0, ctx); + backend_dev->rdma_dev_res->stats.mad_tx++; } } return; @@ -458,20 +478,21 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); if (unlikely(rc)) { complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); - goto out_free_bctx; + goto err_free_bctx; } - rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge); + rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge, + &backend_dev->rdma_dev_res->stats.tx_len); if (rc) { complete_work(IBV_WC_GENERAL_ERR, rc, ctx); - goto out_dealloc_cqe_ctx; + goto err_dealloc_cqe_ctx; } if (qp_type == IBV_QPT_UD) { wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd, sgid_idx, dgid); if (!wr.wr.ud.ah) { complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); - goto out_dealloc_cqe_ctx; + goto err_dealloc_cqe_ctx; } wr.wr.ud.remote_qpn = dqpn; wr.wr.ud.remote_qkey = dqkey; @@ -488,15 +509,19 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, rdma_error_report("ibv_post_send fail, qpn=0x%x, rc=%d, errno=%d", qp->ibqp->qp_num, rc, errno); complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); - goto out_dealloc_cqe_ctx; + goto err_dealloc_cqe_ctx; } + atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe); + backend_dev->rdma_dev_res->stats.tx++; + return; -out_dealloc_cqe_ctx: +err_dealloc_cqe_ctx: + backend_dev->rdma_dev_res->stats.tx_err++; rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id); -out_free_bctx: +err_free_bctx: g_free(bctx); } @@ -554,6 +579,9 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, rc = save_mad_recv_buffer(backend_dev, sge, num_sge, ctx); if (rc) { complete_work(IBV_WC_GENERAL_ERR, rc, ctx); + rdma_dev_res->stats.mad_rx_bufs_err++; + } else { + rdma_dev_res->stats.mad_rx_bufs++; } } return; @@ -565,13 +593,14 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, rc = rdma_rm_alloc_cqe_ctx(rdma_dev_res, &bctx_id, bctx); if (unlikely(rc)) { complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); - goto out_free_bctx; + goto err_free_bctx; } - rc = build_host_sge_array(rdma_dev_res, new_sge, sge, num_sge); + rc = build_host_sge_array(rdma_dev_res, new_sge, sge, num_sge, + &backend_dev->rdma_dev_res->stats.rx_bufs_len); if (rc) { complete_work(IBV_WC_GENERAL_ERR, rc, ctx); - goto out_dealloc_cqe_ctx; + goto err_dealloc_cqe_ctx; } wr.num_sge = num_sge; @@ -582,15 +611,19 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, rdma_error_report("ibv_post_recv fail, qpn=0x%x, rc=%d, errno=%d", qp->ibqp->qp_num, rc, errno); complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx); - goto out_dealloc_cqe_ctx; + goto err_dealloc_cqe_ctx; } + atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe); + rdma_dev_res->stats.rx_bufs++; + return; -out_dealloc_cqe_ctx: +err_dealloc_cqe_ctx: + backend_dev->rdma_dev_res->stats.rx_bufs_err++; rdma_rm_dealloc_cqe_ctx(rdma_dev_res, bctx_id); -out_free_bctx: +err_free_bctx: g_free(bctx); } @@ -929,12 +962,14 @@ static void process_incoming_mad_req(RdmaBackendDev *backend_dev, bctx = rdma_rm_get_cqe_ctx(backend_dev->rdma_dev_res, cqe_ctx_id); if (unlikely(!bctx)) { rdma_error_report("No matching ctx for req %ld", cqe_ctx_id); + backend_dev->rdma_dev_res->stats.mad_rx_err++; return; } mad = rdma_pci_dma_map(backend_dev->dev, bctx->sge.addr, bctx->sge.length); if (!mad || bctx->sge.length < msg->umad_len + MAD_HDR_SIZE) { + backend_dev->rdma_dev_res->stats.mad_rx_err++; complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_MAD_BUFF, bctx->up_ctx); } else { @@ -949,6 +984,7 @@ static void process_incoming_mad_req(RdmaBackendDev *backend_dev, wc.byte_len = msg->umad_len; wc.status = IBV_WC_SUCCESS; wc.wc_flags = IBV_WC_GRH; + backend_dev->rdma_dev_res->stats.mad_rx++; comp_handler(bctx->up_ctx, &wc); } diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index 7ea62a9e60..35fa1ab44e 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -37,6 +37,7 @@ static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl, tbl->bitmap = bitmap_new(tbl_sz); tbl->tbl_sz = tbl_sz; tbl->res_sz = res_sz; + tbl->used = 0; qemu_mutex_init(&tbl->lock); } @@ -76,6 +77,8 @@ static inline void *rdma_res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle) set_bit(*handle, tbl->bitmap); + tbl->used++; + qemu_mutex_unlock(&tbl->lock); memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz); @@ -93,6 +96,7 @@ static inline void rdma_res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle) if (handle < tbl->tbl_sz) { clear_bit(handle, tbl->bitmap); + tbl->used--; } qemu_mutex_unlock(&tbl->lock); @@ -619,6 +623,9 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr) qemu_mutex_init(&dev_res->lock); + memset(&dev_res->stats, 0, sizeof(dev_res->stats)); + atomic_set(&dev_res->stats.missing_cqe, 0); + return 0; } diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h index f0ee1f3072..4b8d704cfe 100644 --- a/hw/rdma/rdma_rm_defs.h +++ b/hw/rdma/rdma_rm_defs.h @@ -34,7 +34,9 @@ #define MAX_QP_INIT_RD_ATOM 16 #define MAX_AH 64 -#define MAX_RM_TBL_NAME 16 +#define MAX_RM_TBL_NAME 16 +#define MAX_CONSEQ_EMPTY_POLL_CQ 4096 /* considered as error above this */ + typedef struct RdmaRmResTbl { char name[MAX_RM_TBL_NAME]; QemuMutex lock; @@ -42,6 +44,7 @@ typedef struct RdmaRmResTbl { size_t tbl_sz; size_t res_sz; void *tbl; + uint32_t used; /* number of used entries in the table */ } RdmaRmResTbl; typedef struct RdmaRmPD { @@ -96,6 +99,27 @@ typedef struct RdmaRmPort { enum ibv_port_state state; } RdmaRmPort; +typedef struct RdmaRmStats { + uint64_t tx; + uint64_t tx_len; + uint64_t tx_err; + uint64_t rx_bufs; + uint64_t rx_bufs_len; + uint64_t rx_bufs_err; + uint64_t completions; + uint64_t mad_tx; + uint64_t mad_tx_err; + uint64_t mad_rx; + uint64_t mad_rx_err; + uint64_t mad_rx_bufs; + uint64_t mad_rx_bufs_err; + uint64_t poll_cq_from_bk; + uint64_t poll_cq_from_guest; + uint64_t poll_cq_from_guest_empty; + uint64_t poll_cq_ppoll_to; + uint32_t missing_cqe; +} RdmaRmStats; + typedef struct RdmaDeviceResources { RdmaRmPort port; RdmaRmResTbl pd_tbl; @@ -106,6 +130,7 @@ typedef struct RdmaDeviceResources { RdmaRmResTbl cqe_ctx_tbl; GHashTable *qp_hash; /* Keeps mapping between real and emulated */ QemuMutex lock; + RdmaRmStats stats; } RdmaDeviceResources; #endif diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h index 0879224957..a8a04a253c 100644 --- a/hw/rdma/vmw/pvrdma.h +++ b/hw/rdma/vmw/pvrdma.h @@ -70,6 +70,14 @@ typedef struct DSRInfo { PvrdmaRing cq; } DSRInfo; +typedef struct PVRDMADevStats { + uint64_t commands; + uint64_t regs_reads; + uint64_t regs_writes; + uint64_t uar_writes; + uint64_t interrupts; +} PVRDMADevStats; + typedef struct PVRDMADev { PCIDevice parent_obj; MemoryRegion msix; @@ -89,6 +97,7 @@ typedef struct PVRDMADev { CharBackend mad_chr; VMXNET3State *func0; Notifier shutdown_notifier; + PVRDMADevStats stats; } PVRDMADev; #define PVRDMA_DEV(dev) OBJECT_CHECK(PVRDMADev, (dev), PVRDMA_HW_NAME) @@ -123,6 +132,7 @@ static inline void post_interrupt(PVRDMADev *dev, unsigned vector) PCIDevice *pci_dev = PCI_DEVICE(dev); if (likely(!dev->interrupt_mask)) { + dev->stats.interrupts++; msix_notify(pci_dev, vector); } } diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index 21a55e225a..6d56746357 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -651,6 +651,8 @@ int pvrdma_exec_cmd(PVRDMADev *dev) trace_pvrdma_exec_cmd(dsr_info->req->hdr.cmd, dsr_info->rsp->hdr.err); + dev->stats.commands++; + out: set_reg_val(dev, PVRDMA_REG_ERR, err); post_interrupt(dev, INTR_VEC_CMD_RING); diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 81ae08bd8d..dd35646324 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -337,6 +337,8 @@ static uint64_t pvrdma_regs_read(void *opaque, hwaddr addr, unsigned size) PVRDMADev *dev = opaque; uint32_t val; + dev->stats.regs_reads++; + if (get_reg_val(dev, addr, &val)) { rdma_error_report("Failed to read REG value from address 0x%x", (uint32_t)addr); @@ -353,6 +355,8 @@ static void pvrdma_regs_write(void *opaque, hwaddr addr, uint64_t val, { PVRDMADev *dev = opaque; + dev->stats.regs_writes++; + if (set_reg_val(dev, addr, val)) { rdma_error_report("Failed to set REG value, addr=0x%"PRIx64 ", val=0x%"PRIx64, addr, val); @@ -421,6 +425,8 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val, { PVRDMADev *dev = opaque; + dev->stats.uar_writes++; + switch (addr & 0xFFF) { /* Mask with 0xFFF as each UC gets page */ case PVRDMA_UAR_QP_OFFSET: if (val & PVRDMA_UAR_QP_SEND) { @@ -612,6 +618,8 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) goto out; } + memset(&dev->stats, 0, sizeof(dev->stats)); + dev->shutdown_notifier.notify = pvrdma_shutdown_notifier; qemu_register_shutdown_notifier(&dev->shutdown_notifier); From f4b2c02a2911b164474b998532b1a963fc9b785b Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Mon, 11 Mar 2019 03:29:09 -0700 Subject: [PATCH 06/18] {hmp, hw/pvrdma}: Expose device internals via monitor interface Allow interrogating device internals through HMP interface. The exposed indicators can be used for troubleshooting by developers or sysadmin. There is no need to expose these attributes to a management system (e.x. libvirt) because (1) most of them are not "device-management' related info and (2) there is no guarantee the interface is stable. Signed-off-by: Yuval Shaia Acked-by: Dr. David Alan Gilbert Acked-by: Markus Armbruster Message-Id: <1552300155-25216-6-git-send-email-yuval.shaia@oracle.com> Reviewed-by: Marcel Apfelbaum Signed-off-by: Marcel Apfelbaum --- hmp-commands-info.hx | 14 +++++++++++ hmp.c | 27 ++++++++++++++++++++ hmp.h | 1 + hw/rdma/Makefile.objs | 2 +- hw/rdma/rdma.c | 30 ++++++++++++++++++++++ hw/rdma/rdma_rm.c | 53 +++++++++++++++++++++++++++++++++++++++ hw/rdma/rdma_rm.h | 1 + hw/rdma/vmw/pvrdma_main.c | 26 +++++++++++++++++++ include/hw/rdma/rdma.h | 40 +++++++++++++++++++++++++++++ 9 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 hw/rdma/rdma.c create mode 100644 include/hw/rdma/rdma.h diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index cbee8b944d..c59444c461 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -202,6 +202,20 @@ STEXI @item info pic @findex info pic Show PIC state. +ETEXI + + { + .name = "rdma", + .args_type = "", + .params = "", + .help = "show RDMA state", + .cmd = hmp_info_rdma, + }, + +STEXI +@item info rdma +@findex info rdma +Show RDMA state. ETEXI { diff --git a/hmp.c b/hmp.c index 4a702d5b97..fa1e59a2fc 100644 --- a/hmp.c +++ b/hmp.c @@ -51,6 +51,7 @@ #include "qemu/error-report.h" #include "exec/ramlist.h" #include "hw/intc/intc.h" +#include "hw/rdma/rdma.h" #include "migration/snapshot.h" #include "migration/misc.h" @@ -1013,6 +1014,32 @@ void hmp_info_pic(Monitor *mon, const QDict *qdict) hmp_info_pic_foreach, mon); } +static int hmp_info_rdma_foreach(Object *obj, void *opaque) +{ + RdmaProvider *rdma; + RdmaProviderClass *k; + Monitor *mon = opaque; + + if (object_dynamic_cast(obj, INTERFACE_RDMA_PROVIDER)) { + rdma = RDMA_PROVIDER(obj); + k = RDMA_PROVIDER_GET_CLASS(obj); + if (k->print_statistics) { + k->print_statistics(mon, rdma); + } else { + monitor_printf(mon, "RDMA statistics not available for %s.\n", + object_get_typename(obj)); + } + } + + return 0; +} + +void hmp_info_rdma(Monitor *mon, const QDict *qdict) +{ + object_child_foreach_recursive(object_get_root(), + hmp_info_rdma_foreach, mon); +} + void hmp_info_pci(Monitor *mon, const QDict *qdict) { PciInfoList *info_list, *info; diff --git a/hmp.h b/hmp.h index e0f32f04d3..43617f2646 100644 --- a/hmp.h +++ b/hmp.h @@ -36,6 +36,7 @@ void hmp_info_spice(Monitor *mon, const QDict *qdict); void hmp_info_balloon(Monitor *mon, const QDict *qdict); void hmp_info_irq(Monitor *mon, const QDict *qdict); void hmp_info_pic(Monitor *mon, const QDict *qdict); +void hmp_info_rdma(Monitor *mon, const QDict *qdict); void hmp_info_pci(Monitor *mon, const QDict *qdict); void hmp_info_block_jobs(Monitor *mon, const QDict *qdict); void hmp_info_tpm(Monitor *mon, const QDict *qdict); diff --git a/hw/rdma/Makefile.objs b/hw/rdma/Makefile.objs index bd36cbf51c..c354e60e5b 100644 --- a/hw/rdma/Makefile.objs +++ b/hw/rdma/Makefile.objs @@ -1,5 +1,5 @@ ifeq ($(CONFIG_PVRDMA),y) -obj-$(CONFIG_PCI) += rdma_utils.o rdma_backend.o rdma_rm.o +obj-$(CONFIG_PCI) += rdma_utils.o rdma_backend.o rdma_rm.o rdma.o obj-$(CONFIG_PCI) += vmw/pvrdma_dev_ring.o vmw/pvrdma_cmd.o \ vmw/pvrdma_qp_ops.o vmw/pvrdma_main.o endif diff --git a/hw/rdma/rdma.c b/hw/rdma/rdma.c new file mode 100644 index 0000000000..7bec0d0d2c --- /dev/null +++ b/hw/rdma/rdma.c @@ -0,0 +1,30 @@ +/* + * RDMA device interface + * + * Copyright (C) 2018 Oracle + * Copyright (C) 2018 Red Hat Inc + * + * Authors: + * Yuval Shaia + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "hw/rdma/rdma.h" +#include "qemu/module.h" + +static const TypeInfo rdma_hmp_info = { + .name = INTERFACE_RDMA_PROVIDER, + .parent = TYPE_INTERFACE, + .class_size = sizeof(RdmaProviderClass), +}; + +static void rdma_register_types(void) +{ + type_register_static(&rdma_hmp_info); +} + +type_init(rdma_register_types) diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index 35fa1ab44e..b50e192b49 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -16,6 +16,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "cpu.h" +#include "monitor/monitor.h" #include "trace.h" #include "rdma_utils.h" @@ -26,6 +27,58 @@ #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } +void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res) +{ + monitor_printf(mon, "\ttx : %" PRId64 "\n", + dev_res->stats.tx); + monitor_printf(mon, "\ttx_len : %" PRId64 "\n", + dev_res->stats.tx_len); + monitor_printf(mon, "\ttx_err : %" PRId64 "\n", + dev_res->stats.tx_err); + monitor_printf(mon, "\trx_bufs : %" PRId64 "\n", + dev_res->stats.rx_bufs); + monitor_printf(mon, "\trx_bufs_len : %" PRId64 "\n", + dev_res->stats.rx_bufs_len); + monitor_printf(mon, "\trx_bufs_err : %" PRId64 "\n", + dev_res->stats.rx_bufs_err); + monitor_printf(mon, "\tcomps : %" PRId64 "\n", + dev_res->stats.completions); + monitor_printf(mon, "\tmissing_comps : %" PRId32 "\n", + dev_res->stats.missing_cqe); + monitor_printf(mon, "\tpoll_cq (bk) : %" PRId64 "\n", + dev_res->stats.poll_cq_from_bk); + monitor_printf(mon, "\tpoll_cq_ppoll_to : %" PRId64 "\n", + dev_res->stats.poll_cq_ppoll_to); + monitor_printf(mon, "\tpoll_cq (fe) : %" PRId64 "\n", + dev_res->stats.poll_cq_from_guest); + monitor_printf(mon, "\tpoll_cq_empty : %" PRId64 "\n", + dev_res->stats.poll_cq_from_guest_empty); + monitor_printf(mon, "\tmad_tx : %" PRId64 "\n", + dev_res->stats.mad_tx); + monitor_printf(mon, "\tmad_tx_err : %" PRId64 "\n", + dev_res->stats.mad_tx_err); + monitor_printf(mon, "\tmad_rx : %" PRId64 "\n", + dev_res->stats.mad_rx); + monitor_printf(mon, "\tmad_rx_err : %" PRId64 "\n", + dev_res->stats.mad_rx_err); + monitor_printf(mon, "\tmad_rx_bufs : %" PRId64 "\n", + dev_res->stats.mad_rx_bufs); + monitor_printf(mon, "\tmad_rx_bufs_err : %" PRId64 "\n", + dev_res->stats.mad_rx_bufs_err); + monitor_printf(mon, "\tPDs : %" PRId32 "\n", + dev_res->pd_tbl.used); + monitor_printf(mon, "\tMRs : %" PRId32 "\n", + dev_res->mr_tbl.used); + monitor_printf(mon, "\tUCs : %" PRId32 "\n", + dev_res->uc_tbl.used); + monitor_printf(mon, "\tQPs : %" PRId32 "\n", + dev_res->qp_tbl.used); + monitor_printf(mon, "\tCQs : %" PRId32 "\n", + dev_res->cq_tbl.used); + monitor_printf(mon, "\tCEQ_CTXs : %" PRId32 "\n", + dev_res->cqe_ctx_tbl.used); +} + static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl, uint32_t tbl_sz, uint32_t res_sz) { diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h index f9b2ec5076..4f03f9b8c5 100644 --- a/hw/rdma/rdma_rm.h +++ b/hw/rdma/rdma_rm.h @@ -81,5 +81,6 @@ static inline union ibv_gid *rdma_rm_get_gid(RdmaDeviceResources *dev_res, { return &dev_res->port.gid_tbl[sgid_idx].gid; } +void rdma_dump_device_counters(Monitor *mon, RdmaDeviceResources *dev_res); #endif diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index dd35646324..729a2df5a0 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -25,6 +25,8 @@ #include "cpu.h" #include "trace.h" #include "sysemu/sysemu.h" +#include "monitor/monitor.h" +#include "hw/rdma/rdma.h" #include "../rdma_rm.h" #include "../rdma_backend.h" @@ -55,6 +57,26 @@ static Property pvrdma_dev_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +static void pvrdma_print_statistics(Monitor *mon, RdmaProvider *obj) +{ + PVRDMADev *dev = PVRDMA_DEV(obj); + PCIDevice *pdev = PCI_DEVICE(dev); + + monitor_printf(mon, "%s, %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn)); + monitor_printf(mon, "\tcommands : %" PRId64 "\n", + dev->stats.commands); + monitor_printf(mon, "\tregs_reads : %" PRId64 "\n", + dev->stats.regs_reads); + monitor_printf(mon, "\tregs_writes : %" PRId64 "\n", + dev->stats.regs_writes); + monitor_printf(mon, "\tuar_writes : %" PRId64 "\n", + dev->stats.uar_writes); + monitor_printf(mon, "\tinterrupts : %" PRId64 "\n", + dev->stats.interrupts); + rdma_dump_device_counters(mon, &dev->rdma_dev_res); +} + static void free_dev_ring(PCIDevice *pci_dev, PvrdmaRing *ring, void *ring_state) { @@ -639,6 +661,7 @@ static void pvrdma_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + RdmaProviderClass *ir = INTERFACE_RDMA_PROVIDER_CLASS(klass); k->realize = pvrdma_realize; k->exit = pvrdma_exit; @@ -650,6 +673,8 @@ static void pvrdma_class_init(ObjectClass *klass, void *data) dc->desc = "RDMA Device"; dc->props = pvrdma_dev_properties; set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); + + ir->print_statistics = pvrdma_print_statistics; } static const TypeInfo pvrdma_info = { @@ -659,6 +684,7 @@ static const TypeInfo pvrdma_info = { .class_init = pvrdma_class_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { INTERFACE_RDMA_PROVIDER }, { } } }; diff --git a/include/hw/rdma/rdma.h b/include/hw/rdma/rdma.h new file mode 100644 index 0000000000..68290fb58c --- /dev/null +++ b/include/hw/rdma/rdma.h @@ -0,0 +1,40 @@ +/* + * RDMA device interface + * + * Copyright (C) 2019 Oracle + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * Yuval Shaia + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef RDMA_H +#define RDMA_H + +#include "qom/object.h" + +#define INTERFACE_RDMA_PROVIDER "rdma" + +#define INTERFACE_RDMA_PROVIDER_CLASS(klass) \ + OBJECT_CLASS_CHECK(RdmaProviderClass, (klass), \ + INTERFACE_RDMA_PROVIDER) +#define RDMA_PROVIDER_GET_CLASS(obj) \ + OBJECT_GET_CLASS(RdmaProviderClass, (obj), \ + INTERFACE_RDMA_PROVIDER) +#define RDMA_PROVIDER(obj) \ + INTERFACE_CHECK(RdmaProvider, (obj), \ + INTERFACE_RDMA_PROVIDER) + +typedef struct RdmaProvider RdmaProvider; + +typedef struct RdmaProviderClass { + InterfaceClass parent; + + void (*print_statistics)(Monitor *mon, RdmaProvider *obj); +} RdmaProviderClass; + +#endif From ff30a446b1d13373d35417d77d80b11cfde3fc6c Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Mon, 11 Mar 2019 03:29:10 -0700 Subject: [PATCH 07/18] hw/rdma: Free all MAD receive buffers when device is closed When device is going down free all saved MAD buffers. Signed-off-by: Yuval Shaia Reviewed-by: Marcel Apfelbaum Message-Id: <1552300155-25216-7-git-send-email-yuval.shaia@oracle.com> Reviewed-by: Kamal Heib Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 34 +++++++++++++++++++++++++++++++++- hw/rdma/vmw/pvrdma_main.c | 2 ++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index e8af9741b1..d0bbe57bd2 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -64,6 +64,33 @@ static inline void complete_work(enum ibv_wc_status status, uint32_t vendor_err, comp_handler(ctx, &wc); } +static void free_cqe_ctx(gpointer data, gpointer user_data) +{ + BackendCtx *bctx; + RdmaDeviceResources *rdma_dev_res = user_data; + unsigned long cqe_ctx_id = GPOINTER_TO_INT(data); + + bctx = rdma_rm_get_cqe_ctx(rdma_dev_res, cqe_ctx_id); + if (bctx) { + rdma_rm_dealloc_cqe_ctx(rdma_dev_res, cqe_ctx_id); + } + g_free(bctx); +} + +static void clean_recv_mads(RdmaBackendDev *backend_dev) +{ + unsigned long cqe_ctx_id; + + do { + cqe_ctx_id = rdma_protected_qlist_pop_int64(&backend_dev-> + recv_mads_list); + if (cqe_ctx_id != -ENOENT) { + free_cqe_ctx(GINT_TO_POINTER(cqe_ctx_id), + backend_dev->rdma_dev_res); + } + } while (cqe_ctx_id != -ENOENT); +} + static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) { int i, ne, total_ne = 0; @@ -1037,6 +1064,11 @@ static int mad_init(RdmaBackendDev *backend_dev, CharBackend *mad_chr_be) return 0; } +static void mad_stop(RdmaBackendDev *backend_dev) +{ + clean_recv_mads(backend_dev); +} + static void mad_fini(RdmaBackendDev *backend_dev) { disable_rdmacm_mux_async(backend_dev); @@ -1224,12 +1256,12 @@ void rdma_backend_start(RdmaBackendDev *backend_dev) void rdma_backend_stop(RdmaBackendDev *backend_dev) { + mad_stop(backend_dev); stop_backend_thread(&backend_dev->comp_thread); } void rdma_backend_fini(RdmaBackendDev *backend_dev) { - rdma_backend_stop(backend_dev); mad_fini(backend_dev); g_hash_table_destroy(ah_hash); ibv_destroy_comp_channel(backend_dev->channel); diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 729a2df5a0..04845f46f7 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -313,6 +313,8 @@ static void pvrdma_fini(PCIDevice *pdev) pvrdma_qp_ops_fini(); + rdma_backend_stop(&dev->backend_dev); + rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev, dev->backend_eth_device_name); From bf4414515b468c0a4ca69f1450bfe65418022955 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Mon, 11 Mar 2019 03:29:11 -0700 Subject: [PATCH 08/18] hw/rdma: Free all receive buffers when QP is destroyed When QP is destroyed the backend QP is destroyed as well. This ensures we clean all received buffer we posted to it. However, a contexts of these buffers are still remain in the device. Fix it by maintaining a list of buffer's context and free them when QP is destroyed. Signed-off-by: Yuval Shaia Reviewed-by: Marcel Apfelbaum Message-Id: <1552300155-25216-8-git-send-email-yuval.shaia@oracle.com> Reviewed-by: Kamal Heib Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 26 ++++++++++++++++++++------ hw/rdma/rdma_backend.h | 2 +- hw/rdma/rdma_backend_defs.h | 2 +- hw/rdma/rdma_rm.c | 2 +- hw/rdma/rdma_utils.c | 29 +++++++++++++++++++++++++++++ hw/rdma/rdma_utils.h | 11 +++++++++++ 6 files changed, 63 insertions(+), 9 deletions(-) diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index d0bbe57bd2..e124d8d16b 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -39,6 +39,7 @@ typedef struct BackendCtx { void *up_ctx; struct ibv_sge sge; /* Used to save MAD recv buffer */ + RdmaBackendQP *backend_qp; /* To maintain recv buffers */ } BackendCtx; struct backend_umad { @@ -73,6 +74,7 @@ static void free_cqe_ctx(gpointer data, gpointer user_data) bctx = rdma_rm_get_cqe_ctx(rdma_dev_res, cqe_ctx_id); if (bctx) { rdma_rm_dealloc_cqe_ctx(rdma_dev_res, cqe_ctx_id); + atomic_dec(&rdma_dev_res->stats.missing_cqe); } g_free(bctx); } @@ -85,13 +87,15 @@ static void clean_recv_mads(RdmaBackendDev *backend_dev) cqe_ctx_id = rdma_protected_qlist_pop_int64(&backend_dev-> recv_mads_list); if (cqe_ctx_id != -ENOENT) { + atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe); free_cqe_ctx(GINT_TO_POINTER(cqe_ctx_id), backend_dev->rdma_dev_res); } } while (cqe_ctx_id != -ENOENT); } -static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) +static int rdma_poll_cq(RdmaBackendDev *backend_dev, + RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) { int i, ne, total_ne = 0; BackendCtx *bctx; @@ -113,6 +117,8 @@ static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) comp_handler(bctx->up_ctx, &wc[i]); + rdma_protected_gslist_remove_int32(&bctx->backend_qp->cqe_ctx_list, + wc[i].wr_id); rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id); g_free(bctx); } @@ -175,14 +181,12 @@ static void *comp_handler_thread(void *arg) } backend_dev->rdma_dev_res->stats.poll_cq_from_bk++; - rdma_poll_cq(backend_dev->rdma_dev_res, ev_cq); + rdma_poll_cq(backend_dev, backend_dev->rdma_dev_res, ev_cq); ibv_ack_cq_events(ev_cq, 1); } } - /* TODO: Post cqe for all remaining buffs that were posted */ - backend_dev->comp_thread.is_running = false; qemu_thread_exit(0); @@ -311,7 +315,7 @@ void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq) int polled; rdma_dev_res->stats.poll_cq_from_guest++; - polled = rdma_poll_cq(rdma_dev_res, cq->ibcq); + polled = rdma_poll_cq(cq->backend_dev, rdma_dev_res, cq->ibcq); if (!polled) { rdma_dev_res->stats.poll_cq_from_guest_empty++; } @@ -501,6 +505,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, bctx = g_malloc0(sizeof(*bctx)); bctx->up_ctx = ctx; + bctx->backend_qp = qp; rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); if (unlikely(rc)) { @@ -508,6 +513,8 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, goto err_free_bctx; } + rdma_protected_gslist_append_int32(&qp->cqe_ctx_list, bctx_id); + rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge, &backend_dev->rdma_dev_res->stats.tx_len); if (rc) { @@ -616,6 +623,7 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, bctx = g_malloc0(sizeof(*bctx)); bctx->up_ctx = ctx; + bctx->backend_qp = qp; rc = rdma_rm_alloc_cqe_ctx(rdma_dev_res, &bctx_id, bctx); if (unlikely(rc)) { @@ -623,6 +631,8 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, goto err_free_bctx; } + rdma_protected_gslist_append_int32(&qp->cqe_ctx_list, bctx_id); + rc = build_host_sge_array(rdma_dev_res, new_sge, sge, num_sge, &backend_dev->rdma_dev_res->stats.rx_bufs_len); if (rc) { @@ -762,6 +772,8 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, return -EIO; } + rdma_protected_gslist_init(&qp->cqe_ctx_list); + qp->ibpd = pd->ibpd; /* TODO: Query QP to get max_inline_data and save it to be used in send */ @@ -919,11 +931,13 @@ int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr, return ibv_query_qp(qp->ibqp, attr, attr_mask, init_attr); } -void rdma_backend_destroy_qp(RdmaBackendQP *qp) +void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res) { if (qp->ibqp) { ibv_destroy_qp(qp->ibqp); } + g_slist_foreach(qp->cqe_ctx_list.list, free_cqe_ctx, dev_res); + rdma_protected_gslist_destroy(&qp->cqe_ctx_list); } #define CHK_ATTR(req, dev, member, fmt) ({ \ diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index 8e53a72bf2..c54eaf2e4a 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -102,7 +102,7 @@ int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type, uint32_t sq_psn, uint32_t qkey, bool use_qkey); int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr); -void rdma_backend_destroy_qp(RdmaBackendQP *qp); +void rdma_backend_destroy_qp(RdmaBackendQP *qp, RdmaDeviceResources *dev_res); void rdma_backend_post_send(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h index a8c15b09ab..817153dc8c 100644 --- a/hw/rdma/rdma_backend_defs.h +++ b/hw/rdma/rdma_backend_defs.h @@ -26,7 +26,6 @@ typedef struct RdmaDeviceResources RdmaDeviceResources; typedef struct RdmaBackendThread { QemuThread thread; - QemuMutex mutex; bool run; /* Set by thread manager to let thread know it should exit */ bool is_running; /* Set by the thread to report its status */ } RdmaBackendThread; @@ -66,6 +65,7 @@ typedef struct RdmaBackendQP { struct ibv_pd *ibpd; struct ibv_qp *ibqp; uint8_t sgid_idx; + RdmaProtectedGSList cqe_ctx_list; } RdmaBackendQP; #endif diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index b50e192b49..bac3b2f4a6 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -537,7 +537,7 @@ void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle) return; } - rdma_backend_destroy_qp(&qp->backend_qp); + rdma_backend_destroy_qp(&qp->backend_qp, dev_res); rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn); } diff --git a/hw/rdma/rdma_utils.c b/hw/rdma/rdma_utils.c index 0a8abe572d..73f279104c 100644 --- a/hw/rdma/rdma_utils.c +++ b/hw/rdma/rdma_utils.c @@ -90,3 +90,32 @@ int64_t rdma_protected_qlist_pop_int64(RdmaProtectedQList *list) return qnum_get_uint(qobject_to(QNum, obj)); } + +void rdma_protected_gslist_init(RdmaProtectedGSList *list) +{ + qemu_mutex_init(&list->lock); +} + +void rdma_protected_gslist_destroy(RdmaProtectedGSList *list) +{ + if (list->list) { + g_slist_free(list->list); + list->list = NULL; + } +} + +void rdma_protected_gslist_append_int32(RdmaProtectedGSList *list, + int32_t value) +{ + qemu_mutex_lock(&list->lock); + list->list = g_slist_prepend(list->list, GINT_TO_POINTER(value)); + qemu_mutex_unlock(&list->lock); +} + +void rdma_protected_gslist_remove_int32(RdmaProtectedGSList *list, + int32_t value) +{ + qemu_mutex_lock(&list->lock); + list->list = g_slist_remove(list->list, GINT_TO_POINTER(value)); + qemu_mutex_unlock(&list->lock); +} diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h index a8bf1d4fec..2d42249691 100644 --- a/hw/rdma/rdma_utils.h +++ b/hw/rdma/rdma_utils.h @@ -34,12 +34,23 @@ typedef struct RdmaProtectedQList { QList *list; } RdmaProtectedQList; +typedef struct RdmaProtectedGSList { + QemuMutex lock; + GSList *list; +} RdmaProtectedGSList; + void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen); void rdma_pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len); void rdma_protected_qlist_init(RdmaProtectedQList *list); void rdma_protected_qlist_destroy(RdmaProtectedQList *list); void rdma_protected_qlist_append_int64(RdmaProtectedQList *list, int64_t value); int64_t rdma_protected_qlist_pop_int64(RdmaProtectedQList *list); +void rdma_protected_gslist_init(RdmaProtectedGSList *list); +void rdma_protected_gslist_destroy(RdmaProtectedGSList *list); +void rdma_protected_gslist_append_int32(RdmaProtectedGSList *list, + int32_t value); +void rdma_protected_gslist_remove_int32(RdmaProtectedGSList *list, + int32_t value); static inline void addrconf_addr_eui48(uint8_t *eui, const char *addr) { From 3c890bcf3088689c4c5f883b5189646f5862a91f Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Mon, 11 Mar 2019 03:29:12 -0700 Subject: [PATCH 09/18] hw/pvrdma: Delete unneeded function argument The function's argument rdma_dev_res is not needed as it is stored in the backend_dev object at init. Signed-off-by: Yuval Shaia Reviewed-by: Marcel Apfelbaum Message-Id: <1552300155-25216-9-git-send-email-yuval.shaia@oracle.com> Reviewed-by: Kamal Heib Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 13 ++++++------- hw/rdma/rdma_backend.h | 1 - hw/rdma/vmw/pvrdma_qp_ops.c | 3 +-- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index e124d8d16b..89279e66e7 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -594,7 +594,6 @@ static unsigned int save_mad_recv_buffer(RdmaBackendDev *backend_dev, } void rdma_backend_post_recv(RdmaBackendDev *backend_dev, - RdmaDeviceResources *rdma_dev_res, RdmaBackendQP *qp, uint8_t qp_type, struct ibv_sge *sge, uint32_t num_sge, void *ctx) { @@ -613,9 +612,9 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, rc = save_mad_recv_buffer(backend_dev, sge, num_sge, ctx); if (rc) { complete_work(IBV_WC_GENERAL_ERR, rc, ctx); - rdma_dev_res->stats.mad_rx_bufs_err++; + backend_dev->rdma_dev_res->stats.mad_rx_bufs_err++; } else { - rdma_dev_res->stats.mad_rx_bufs++; + backend_dev->rdma_dev_res->stats.mad_rx_bufs++; } } return; @@ -625,7 +624,7 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, bctx->up_ctx = ctx; bctx->backend_qp = qp; - rc = rdma_rm_alloc_cqe_ctx(rdma_dev_res, &bctx_id, bctx); + rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx); if (unlikely(rc)) { complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx); goto err_free_bctx; @@ -633,7 +632,7 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, rdma_protected_gslist_append_int32(&qp->cqe_ctx_list, bctx_id); - rc = build_host_sge_array(rdma_dev_res, new_sge, sge, num_sge, + rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge, &backend_dev->rdma_dev_res->stats.rx_bufs_len); if (rc) { complete_work(IBV_WC_GENERAL_ERR, rc, ctx); @@ -652,13 +651,13 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, } atomic_inc(&backend_dev->rdma_dev_res->stats.missing_cqe); - rdma_dev_res->stats.rx_bufs++; + backend_dev->rdma_dev_res->stats.rx_bufs++; return; err_dealloc_cqe_ctx: backend_dev->rdma_dev_res->stats.rx_bufs_err++; - rdma_rm_dealloc_cqe_ctx(rdma_dev_res, bctx_id); + rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id); err_free_bctx: g_free(bctx); diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index c54eaf2e4a..38056d97c7 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -111,7 +111,6 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey, void *ctx); void rdma_backend_post_recv(RdmaBackendDev *backend_dev, - RdmaDeviceResources *rdma_dev_res, RdmaBackendQP *qp, uint8_t qp_type, struct ibv_sge *sge, uint32_t num_sge, void *ctx); diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c index 16db726dac..508d8fca3c 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.c +++ b/hw/rdma/vmw/pvrdma_qp_ops.c @@ -231,8 +231,7 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) continue; } - rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res, - &qp->backend_qp, qp->qp_type, + rdma_backend_post_recv(&dev->backend_dev, &qp->backend_qp, qp->qp_type, (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge, comp_ctx); From a2f1dc6091e801514f955af255af89d440e762fc Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Mon, 11 Mar 2019 03:29:13 -0700 Subject: [PATCH 10/18] hw/pvrdma: Delete pvrdma_exit function This hook is not called and was implemented by mistake. Signed-off-by: Yuval Shaia Reviewed-by: Marcel Apfelbaum Message-Id: <1552300155-25216-10-git-send-email-yuval.shaia@oracle.com> Reviewed-by: Kamal Heib Signed-off-by: Marcel Apfelbaum --- hw/rdma/vmw/pvrdma_main.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 04845f46f7..a4afceda14 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -654,11 +654,6 @@ out: } } -static void pvrdma_exit(PCIDevice *pdev) -{ - pvrdma_fini(pdev); -} - static void pvrdma_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -666,7 +661,6 @@ static void pvrdma_class_init(ObjectClass *klass, void *data) RdmaProviderClass *ir = INTERFACE_RDMA_PROVIDER_CLASS(klass); k->realize = pvrdma_realize; - k->exit = pvrdma_exit; k->vendor_id = PCI_VENDOR_ID_VMWARE; k->device_id = PCI_DEVICE_ID_VMWARE_PVRDMA; k->revision = 0x00; From b556c3cefcedec0fc892239f017ef7ddaa515311 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Mon, 11 Mar 2019 03:29:14 -0700 Subject: [PATCH 11/18] hw/pvrdma: Unregister from shutdown notifier when device goes down This hook was installed to close the device when VM is going down. After the device is closed there is no need to be informed on VM shutdown. Signed-off-by: Yuval Shaia Reviewed-by: Marcel Apfelbaum Message-Id: <1552300155-25216-11-git-send-email-yuval.shaia@oracle.com> Reviewed-by: Kamal Heib Signed-off-by: Marcel Apfelbaum --- hw/rdma/vmw/pvrdma_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index a4afceda14..49bfbd6d41 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -311,6 +311,8 @@ static void pvrdma_fini(PCIDevice *pdev) { PVRDMADev *dev = PVRDMA_DEV(pdev); + notifier_remove(&dev->shutdown_notifier); + pvrdma_qp_ops_fini(); rdma_backend_stop(&dev->backend_dev); From db8b88bf2cb5fc6bae421eb0be488e0047008438 Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Mon, 11 Mar 2019 03:29:15 -0700 Subject: [PATCH 12/18] hw/pvrdma: Provide correct value to object_get_typename Use base object of PCIDevice in call to object_get_typename(). Signed-off-by: Yuval Shaia Message-Id: <1552300155-25216-12-git-send-email-yuval.shaia@oracle.com> Reviewed-by: Kamal Heib Reviewed-by: Marcel Apfelbaum Signed-off-by: Marcel Apfelbaum --- hw/rdma/vmw/pvrdma_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 49bfbd6d41..0b46561bad 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -593,7 +593,7 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) func0 = pci_get_function_0(pdev); /* Break if not vmxnet3 device in slot 0 */ - if (strcmp(object_get_typename(&func0->qdev.parent_obj), TYPE_VMXNET3)) { + if (strcmp(object_get_typename(OBJECT(func0)), TYPE_VMXNET3)) { error_setg(errp, "Device on %x.0 must be %s", PCI_SLOT(pdev->devfn), TYPE_VMXNET3); return; From 59f911938fbaa6a5eff1146c8a4d74e1c55ecc2b Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 14 Feb 2019 17:40:53 +0200 Subject: [PATCH 13/18] hw/rdma: another clang compilation fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Configuring QEMU with: configure --target-list="x86_64-softmmu" --cc=clang --enable-pvrdma Results in: qemu/hw/rdma/rdma_rm_defs.h:108:3: error: redefinition of typedef 'RdmaDeviceResources' is a C11 feature [-Werror,-Wtypedef-redefinition] } RdmaDeviceResources; ^ qemu/hw/rdma/rdma_backend_defs.h:24:36: note: previous definition is here typedef struct RdmaDeviceResources RdmaDeviceResources; Fix by removing one of the 'typedef' definitions. Signed-off-by: Marcel Apfelbaum Message-Id: <20190214154053.15050-1-marcel.apfelbaum@gmail.com> Reviewed-by: Philippe Mathieu-Daudé Acked-by: Kamal Heib Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_rm_defs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h index 4b8d704cfe..c200d311de 100644 --- a/hw/rdma/rdma_rm_defs.h +++ b/hw/rdma/rdma_rm_defs.h @@ -120,7 +120,7 @@ typedef struct RdmaRmStats { uint32_t missing_cqe; } RdmaRmStats; -typedef struct RdmaDeviceResources { +struct RdmaDeviceResources { RdmaRmPort port; RdmaRmResTbl pd_tbl; RdmaRmResTbl mr_tbl; @@ -131,6 +131,6 @@ typedef struct RdmaDeviceResources { GHashTable *qp_hash; /* Keeps mapping between real and emulated */ QemuMutex lock; RdmaRmStats stats; -} RdmaDeviceResources; +}; #endif From 7b6f6e8138a462e9b40b008d62c4e0852a5505b8 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 14 Mar 2019 17:30:28 +0200 Subject: [PATCH 14/18] hw/rdma: Fix broken paths to docs/devel/tracing.txt The tracing.txt file is under "docs/devel" and not "docs". Reviewed-by: Yuval Shaia Signed-off-by: Kamal Heib Message-Id: <20190314153031.7197-2-kamalheib1@gmail.com> Signed-off-by: Marcel Apfelbaum --- hw/rdma/trace-events | 2 +- hw/rdma/vmw/trace-events | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/rdma/trace-events b/hw/rdma/trace-events index 0fad56c882..12868d8a87 100644 --- a/hw/rdma/trace-events +++ b/hw/rdma/trace-events @@ -1,4 +1,4 @@ -# See docs/tracing.txt for syntax documentation. +# See docs/devel/tracing.txt for syntax documentation. # hw/rdma/rdma_backend.c rdma_check_dev_attr(const char *name, int max_bk, int max_fe) "%s: be=%d, fe=%d" diff --git a/hw/rdma/vmw/trace-events b/hw/rdma/vmw/trace-events index 0122266ad7..e846d54359 100644 --- a/hw/rdma/vmw/trace-events +++ b/hw/rdma/vmw/trace-events @@ -1,4 +1,4 @@ -# See docs/tracing.txt for syntax documentation. +# See docs/devel/tracing.txt for syntax documentation. # hw/rdma/vmw/pvrdma_main.c pvrdma_regs_read(uint64_t addr, uint64_t val) "pvrdma.regs[0x%"PRIx64"]=0x%"PRIx64 From 1373f4a8728372a2b50ae8e0e53ae79182c1da29 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 14 Mar 2019 17:30:29 +0200 Subject: [PATCH 15/18] hw/rdma: Remove unused parameter from rdma_poll_cq() The 'rdma_dev_res' parameter is not used in rdma_poll_cq(), so remove it. Reviewed-by: Yuval Shaia Reviewed-by: Marcel Apfelbaum Signed-off-by: Kamal Heib Message-Id: <20190314153031.7197-3-kamalheib1@gmail.com> Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index 89279e66e7..90983d2846 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -94,8 +94,7 @@ static void clean_recv_mads(RdmaBackendDev *backend_dev) } while (cqe_ctx_id != -ENOENT); } -static int rdma_poll_cq(RdmaBackendDev *backend_dev, - RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) +static int rdma_poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq) { int i, ne, total_ne = 0; BackendCtx *bctx; @@ -181,7 +180,7 @@ static void *comp_handler_thread(void *arg) } backend_dev->rdma_dev_res->stats.poll_cq_from_bk++; - rdma_poll_cq(backend_dev, backend_dev->rdma_dev_res, ev_cq); + rdma_poll_cq(backend_dev->rdma_dev_res, ev_cq); ibv_ack_cq_events(ev_cq, 1); } @@ -315,7 +314,7 @@ void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq) int polled; rdma_dev_res->stats.poll_cq_from_guest++; - polled = rdma_poll_cq(cq->backend_dev, rdma_dev_res, cq->ibcq); + polled = rdma_poll_cq(rdma_dev_res, cq->ibcq); if (!polled) { rdma_dev_res->stats.poll_cq_from_guest_empty++; } From a421c81148925c31c0515aa9d4c543b34e632cea Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 14 Mar 2019 17:30:30 +0200 Subject: [PATCH 16/18] hw/rdma: Use {} instead of {0} Initialize structs with {} instead of {0} to make sure that all code is using the same convention. Reviewed-by: Marcel Apfelbaum Signed-off-by: Kamal Heib Reviewed-by: Yuval Shaia Message-Id: <20190314153031.7197-4-kamalheib1@gmail.com> Signed-off-by: Marcel Apfelbaum --- hw/rdma/rdma_backend.c | 18 +++++++++--------- hw/rdma/vmw/pvrdma_cmd.c | 2 +- hw/rdma/vmw/pvrdma_qp_ops.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index 90983d2846..d1660b6474 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -57,7 +57,7 @@ static void dummy_comp_handler(void *ctx, struct ibv_wc *wc) static inline void complete_work(enum ibv_wc_status status, uint32_t vendor_err, void *ctx) { - struct ibv_wc wc = {0}; + struct ibv_wc wc = {}; wc.status = status; wc.vendor_err = vendor_err; @@ -273,7 +273,7 @@ static void stop_backend_thread(RdmaBackendThread *thread) static void start_comp_thread(RdmaBackendDev *backend_dev) { - char thread_name[THR_NAME_LEN] = {0}; + char thread_name[THR_NAME_LEN] = {}; stop_backend_thread(&backend_dev->comp_thread); @@ -483,7 +483,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev, struct ibv_sge new_sge[MAX_SGE]; uint32_t bctx_id; int rc; - struct ibv_send_wr wr = {0}, *bad_wr; + struct ibv_send_wr wr = {}, *bad_wr; if (!qp->ibqp) { /* This field is not initialized for QP0 and QP1 */ if (qp_type == IBV_QPT_SMI) { @@ -600,7 +600,7 @@ void rdma_backend_post_recv(RdmaBackendDev *backend_dev, struct ibv_sge new_sge[MAX_SGE]; uint32_t bctx_id; int rc; - struct ibv_recv_wr wr = {0}, *bad_wr; + struct ibv_recv_wr wr = {}, *bad_wr; if (!qp->ibqp) { /* This field does not get initialized for QP0 and QP1 */ if (qp_type == IBV_QPT_SMI) { @@ -737,7 +737,7 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, uint32_t max_recv_wr, uint32_t max_send_sge, uint32_t max_recv_sge) { - struct ibv_qp_init_attr attr = {0}; + struct ibv_qp_init_attr attr = {}; qp->ibqp = 0; @@ -782,7 +782,7 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type, int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, uint32_t qkey) { - struct ibv_qp_attr attr = {0}; + struct ibv_qp_attr attr = {}; int rc, attr_mask; attr_mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT; @@ -821,7 +821,7 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, union ibv_gid *dgid, uint32_t dqpn, uint32_t rq_psn, uint32_t qkey, bool use_qkey) { - struct ibv_qp_attr attr = {0}; + struct ibv_qp_attr attr = {}; union ibv_gid ibv_gid = { .global.interface_id = dgid->global.interface_id, .global.subnet_prefix = dgid->global.subnet_prefix @@ -880,7 +880,7 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type, uint32_t sq_psn, uint32_t qkey, bool use_qkey) { - struct ibv_qp_attr attr = {0}; + struct ibv_qp_attr attr = {}; int rc, attr_mask; attr.qp_state = IBV_QPS_RTS; @@ -1012,7 +1012,7 @@ static void process_incoming_mad_req(RdmaBackendDev *backend_dev, complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_MAD_BUFF, bctx->up_ctx); } else { - struct ibv_wc wc = {0}; + struct ibv_wc wc = {}; memset(mad, 0, bctx->sge.length); build_mad_hdr((struct ibv_grh *)mad, (union ibv_gid *)&msg->umad.hdr.addr.gid, &msg->hdr.sgid, diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index 6d56746357..b682e919d5 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -123,7 +123,7 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req *req, { struct pvrdma_cmd_query_port *cmd = &req->query_port; struct pvrdma_cmd_query_port_resp *resp = &rsp->query_port_resp; - struct pvrdma_port_attr attrs = {0}; + struct pvrdma_port_attr attrs = {}; if (cmd->port_num > MAX_PORTS) { return -EINVAL; diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c index 508d8fca3c..5b9786efbe 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.c +++ b/hw/rdma/vmw/pvrdma_qp_ops.c @@ -114,7 +114,7 @@ static void pvrdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc) static void complete_with_error(uint32_t vendor_err, void *ctx) { - struct ibv_wc wc = {0}; + struct ibv_wc wc = {}; wc.status = IBV_WC_GENERAL_ERR; wc.vendor_err = vendor_err; From d151f5debdf1fe33014205e271e2cedd8fc824a2 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 14 Mar 2019 17:30:31 +0200 Subject: [PATCH 17/18] hw/pvrdma: Fix zero-initialization of resp in {query/modify}_qp Make sure to zero-initialize only the pvrdma_cmd_query_qp_resp and not the whole pvrdma_cmd_resp for query_qp, in modify_qp the resp isn't used so remove it. Reviewed-by: Yuval Shaia Signed-off-by: Kamal Heib Message-Id: <20190314153031.7197-5-kamalheib1@gmail.com> Signed-off-by: Marcel Apfelbaum --- hw/rdma/vmw/pvrdma_cmd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index b682e919d5..be8c2b61c9 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -488,8 +488,6 @@ static int modify_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, struct pvrdma_cmd_modify_qp *cmd = &req->modify_qp; int rc; - memset(rsp, 0, sizeof(*rsp)); - /* No need to verify sgid_index since it is u8 */ rc = rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev, @@ -512,7 +510,7 @@ static int query_qp(PVRDMADev *dev, union pvrdma_cmd_req *req, struct ibv_qp_init_attr init_attr; int rc; - memset(rsp, 0, sizeof(*rsp)); + memset(resp, 0, sizeof(*resp)); rc = rdma_rm_query_qp(&dev->rdma_dev_res, &dev->backend_dev, cmd->qp_handle, (struct ibv_qp_attr *)&resp->attrs, cmd->attr_mask, From cb42a5867e7677a9fa1885a8436d3e7e8cbeeee9 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Wed, 27 Feb 2019 10:55:46 +0200 Subject: [PATCH 18/18] hw/rdma: Fix the error prints in create_qp_rings() The prints should indicate that we are talking about QP and not CQ. Fixes: 98d176f8e592 ("hw/rdma: PVRDMA commands and data-path ops") Reviewed-by: Yuval Shaia Signed-off-by: Kamal Heib Message-Id: <20190227085546.23690-1-kamalheib1@gmail.com> Signed-off-by: Marcel Apfelbaum --- hw/rdma/vmw/pvrdma_cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index be8c2b61c9..4afcd2037d 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -374,13 +374,13 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE); if (!dir) { - rdma_error_report("Failed to map to CQ page directory"); + rdma_error_report("Failed to map to QP page directory"); goto out; } tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); if (!tbl) { - rdma_error_report("Failed to map to CQ page table"); + rdma_error_report("Failed to map to QP page table"); goto out; } @@ -393,7 +393,7 @@ static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma, sr->ring_state = (struct pvrdma_ring *) rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); if (!sr->ring_state) { - rdma_error_report("Failed to map to CQ ring state"); + rdma_error_report("Failed to map to QP ring state"); goto out_free_sr_mem; }