RDMA/cm: Replace pr_debug() call sites with tracepoints

In the interest of converging on a common instrumentation infrastructure,
modernize the pr_debug() call sites added by commit 119bf81793 ("IB/cm:
Add debug prints to ib_cm"). The new tracepoints appear in a new "ib_cma"
subsystem.

The conversion is somewhat mechanical. Someone more familiar with the
semantics of the recorded information might suggest additional data
capture.

Some benefits include:

- Tracepoints enable "always on" reporting of these errors
- The error records are structured and compact
- Tracepoints provide hooks for eBPF scripts

Sample output:

            nfsd-1954  [003]    62.017901: icm_dreq_skipped:     local_id=1998890974 remote_id=1129750393 state=DREQ_RCVD lap_state=LAP_UNINIT

Link: https://lore.kernel.org/r/159767239665.2968.10613294222688696646.stgit@klimt.1015granger.net
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Chuck Lever 2020-08-17 09:53:16 -04:00 committed by Jason Gunthorpe
parent b3d03daa7c
commit 75874b3d50
4 changed files with 351 additions and 55 deletions

View File

@ -17,7 +17,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
ib_cm-y := cm.o
ib_cm-y := cm.o cm_trace.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o

View File

@ -27,6 +27,7 @@
#include <rdma/ib_cm.h>
#include "cm_msgs.h"
#include "core_priv.h"
#include "cm_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
@ -2123,8 +2124,7 @@ static int cm_req_handler(struct cm_work *work)
listen_cm_id_priv = cm_match_req(work, cm_id_priv);
if (!listen_cm_id_priv) {
pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
be32_to_cpu(cm_id_priv->id.local_id));
trace_icm_no_listener_err(&cm_id_priv->id);
cm_id_priv->id.state = IB_CM_IDLE;
ret = -EINVAL;
goto destroy;
@ -2273,8 +2273,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state);
ret = -EINVAL;
goto out;
}
@ -2347,8 +2346,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
be32_to_cpu(cm_id->local_id), cm_id->state);
trace_icm_send_cm_rtu_err(cm_id);
ret = -EINVAL;
goto error;
}
@ -2464,7 +2462,7 @@ static int cm_rep_handler(struct cm_work *work)
cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
trace_icm_remote_no_priv_err(
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
return -EINVAL;
}
@ -2478,11 +2476,10 @@ static int cm_rep_handler(struct cm_work *work)
break;
default:
ret = -EINVAL;
pr_debug(
"%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
__func__, cm_id_priv->id.state,
trace_icm_rep_unknown_err(
IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg),
cm_id_priv->id.state);
spin_unlock_irq(&cm_id_priv->lock);
goto error;
}
@ -2499,7 +2496,7 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
pr_debug("%s: Failed to insert remote id %d\n", __func__,
trace_icm_insert_failed_err(
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
goto error;
}
@ -2516,9 +2513,8 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
pr_debug(
"%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
__func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
trace_icm_staleconn_err(
IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
if (cur_cm_id_priv) {
@ -2645,9 +2641,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv,
return -EINVAL;
if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
be32_to_cpu(cm_id_priv->id.local_id),
cm_id_priv->id.state);
trace_icm_dreq_skipped(&cm_id_priv->id);
return -EINVAL;
}
@ -2721,10 +2715,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
return -EINVAL;
if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
pr_debug(
"%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
__func__, be32_to_cpu(cm_id_priv->id.local_id),
cm_id_priv->id.state);
trace_icm_send_drep_err(&cm_id_priv->id);
kfree(private_data);
return -EINVAL;
}
@ -2809,9 +2800,8 @@ static int cm_dreq_handler(struct cm_work *work)
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
pr_debug(
"%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
__func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
trace_icm_no_priv_err(
IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
return -EINVAL;
}
@ -2857,9 +2847,7 @@ static int cm_dreq_handler(struct cm_work *work)
counter[CM_DREQ_COUNTER]);
goto unlock;
default:
pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
__func__, be32_to_cpu(cm_id_priv->id.local_id),
cm_id_priv->id.state);
trace_icm_dreq_unknown_err(&cm_id_priv->id);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@ -2944,9 +2932,7 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
state);
break;
default:
pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
be32_to_cpu(cm_id_priv->id.local_id),
cm_id_priv->id.state);
trace_icm_send_unknown_rej_err(&cm_id_priv->id);
return -EINVAL;
}
@ -3059,9 +3045,7 @@ static int cm_rej_handler(struct cm_work *work)
}
/* fall through */
default:
pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
__func__, be32_to_cpu(cm_id_priv->id.local_id),
cm_id_priv->id.state);
trace_icm_rej_unknown_err(&cm_id_priv->id);
spin_unlock_irq(&cm_id_priv->lock);
goto out;
}
@ -3117,9 +3101,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
}
/* fall through */
default:
pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
__func__, be32_to_cpu(cm_id_priv->id.local_id),
cm_id_priv->id.state);
trace_icm_send_mra_unknown_err(&cm_id_priv->id);
ret = -EINVAL;
goto error1;
}
@ -3228,9 +3210,7 @@ static int cm_mra_handler(struct cm_work *work)
counter[CM_MRA_COUNTER]);
/* fall through */
default:
pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
__func__, be32_to_cpu(cm_id_priv->id.local_id),
cm_id_priv->id.state);
trace_icm_mra_unknown_err(&cm_id_priv->id);
goto out;
}
@ -3766,8 +3746,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
goto discard;
pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
state, ib_wc_status_msg(wc_status));
trace_icm_mad_send_err(state, wc_status);
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
@ -3890,7 +3869,7 @@ static void cm_work_handler(struct work_struct *_work)
ret = cm_timewait_handler(work);
break;
default:
pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
trace_icm_handler_err(work->cm_event.event);
ret = -EINVAL;
break;
}
@ -3926,8 +3905,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
ret = -EISCONN;
break;
default:
pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
be32_to_cpu(cm_id->local_id), cm_id->state);
trace_icm_establish_err(cm_id);
ret = -EINVAL;
break;
}
@ -4124,9 +4102,7 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
__func__, be32_to_cpu(cm_id_priv->id.local_id),
cm_id_priv->id.state);
trace_icm_qp_init_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@ -4174,9 +4150,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
__func__, be32_to_cpu(cm_id_priv->id.local_id),
cm_id_priv->id.state);
trace_icm_qp_rtr_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@ -4236,9 +4210,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
__func__, be32_to_cpu(cm_id_priv->id.local_id),
cm_id_priv->id.state);
trace_icm_qp_rts_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}

View File

@ -0,0 +1,15 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Trace points for the IB Connection Manager.
*
* Author: Chuck Lever <chuck.lever@oracle.com>
*
* Copyright (c) 2020, Oracle and/or its affiliates.
*/
#include <rdma/rdma_cm.h>
#include "cma_priv.h"
#define CREATE_TRACE_POINTS
#include "cm_trace.h"

View File

@ -0,0 +1,309 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Trace point definitions for the RDMA Connect Manager.
*
* Author: Chuck Lever <chuck.lever@oracle.com>
*
* Copyright (c) 2020 Oracle and/or its affiliates.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM ib_cma
#if !defined(_TRACE_IB_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_IB_CMA_H
#include <linux/tracepoint.h>
#include <rdma/ib_cm.h>
#include <trace/events/rdma.h>
/*
* enum ib_cm_state, from include/rdma/ib_cm.h
*/
#define IB_CM_STATE_LIST \
ib_cm_state(IDLE) \
ib_cm_state(LISTEN) \
ib_cm_state(REQ_SENT) \
ib_cm_state(REQ_RCVD) \
ib_cm_state(MRA_REQ_SENT) \
ib_cm_state(MRA_REQ_RCVD) \
ib_cm_state(REP_SENT) \
ib_cm_state(REP_RCVD) \
ib_cm_state(MRA_REP_SENT) \
ib_cm_state(MRA_REP_RCVD) \
ib_cm_state(ESTABLISHED) \
ib_cm_state(DREQ_SENT) \
ib_cm_state(DREQ_RCVD) \
ib_cm_state(TIMEWAIT) \
ib_cm_state(SIDR_REQ_SENT) \
ib_cm_state_end(SIDR_REQ_RCVD)
#undef ib_cm_state
#undef ib_cm_state_end
#define ib_cm_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
#define ib_cm_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
IB_CM_STATE_LIST
#undef ib_cm_state
#undef ib_cm_state_end
#define ib_cm_state(x) { IB_CM_##x, #x },
#define ib_cm_state_end(x) { IB_CM_##x, #x }
#define show_ib_cm_state(x) \
__print_symbolic(x, IB_CM_STATE_LIST)
/*
* enum ib_cm_lap_state, from include/rdma/ib_cm.h
*/
#define IB_CM_LAP_STATE_LIST \
ib_cm_lap_state(LAP_UNINIT) \
ib_cm_lap_state(LAP_IDLE) \
ib_cm_lap_state(LAP_SENT) \
ib_cm_lap_state(LAP_RCVD) \
ib_cm_lap_state(MRA_LAP_SENT) \
ib_cm_lap_state_end(MRA_LAP_RCVD)
#undef ib_cm_lap_state
#undef ib_cm_lap_state_end
#define ib_cm_lap_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
#define ib_cm_lap_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
IB_CM_LAP_STATE_LIST
#undef ib_cm_lap_state
#undef ib_cm_lap_state_end
#define ib_cm_lap_state(x) { IB_CM_##x, #x },
#define ib_cm_lap_state_end(x) { IB_CM_##x, #x }
#define show_ib_cm_lap_state(x) \
__print_symbolic(x, IB_CM_LAP_STATE_LIST)
DECLARE_EVENT_CLASS(icm_id_class,
TP_PROTO(
const struct ib_cm_id *cm_id
),
TP_ARGS(cm_id),
TP_STRUCT__entry(
__field(const void *, cm_id) /* for eBPF scripts */
__field(unsigned int, local_id)
__field(unsigned int, remote_id)
__field(unsigned long, state)
__field(unsigned long, lap_state)
),
TP_fast_assign(
__entry->cm_id = cm_id;
__entry->local_id = be32_to_cpu(cm_id->local_id);
__entry->remote_id = be32_to_cpu(cm_id->remote_id);
__entry->state = cm_id->state;
__entry->lap_state = cm_id->lap_state;
),
TP_printk("local_id=%u remote_id=%u state=%s lap_state=%s",
__entry->local_id, __entry->remote_id,
show_ib_cm_state(__entry->state),
show_ib_cm_lap_state(__entry->lap_state)
)
);
#define DEFINE_CM_ERR_EVENT(name) \
DEFINE_EVENT(icm_id_class, \
icm_##name##_err, \
TP_PROTO( \
const struct ib_cm_id *cm_id \
), \
TP_ARGS(cm_id))
DEFINE_CM_ERR_EVENT(send_cm_rtu);
DEFINE_CM_ERR_EVENT(establish);
DEFINE_CM_ERR_EVENT(no_listener);
DEFINE_CM_ERR_EVENT(send_drep);
DEFINE_CM_ERR_EVENT(dreq_unknown);
DEFINE_CM_ERR_EVENT(send_unknown_rej);
DEFINE_CM_ERR_EVENT(rej_unknown);
DEFINE_CM_ERR_EVENT(send_mra_unknown);
DEFINE_CM_ERR_EVENT(mra_unknown);
DEFINE_CM_ERR_EVENT(qp_init);
DEFINE_CM_ERR_EVENT(qp_rtr);
DEFINE_CM_ERR_EVENT(qp_rts);
DEFINE_EVENT(icm_id_class, \
icm_dreq_skipped, \
TP_PROTO( \
const struct ib_cm_id *cm_id \
), \
TP_ARGS(cm_id) \
);
DECLARE_EVENT_CLASS(icm_local_class,
TP_PROTO(
unsigned int local_id,
unsigned int remote_id
),
TP_ARGS(local_id, remote_id),
TP_STRUCT__entry(
__field(unsigned int, local_id)
__field(unsigned int, remote_id)
),
TP_fast_assign(
__entry->local_id = local_id;
__entry->remote_id = remote_id;
),
TP_printk("local_id=%u remote_id=%u",
__entry->local_id, __entry->remote_id
)
);
#define DEFINE_CM_LOCAL_EVENT(name) \
DEFINE_EVENT(icm_local_class, \
icm_##name, \
TP_PROTO( \
unsigned int local_id, \
unsigned int remote_id \
), \
TP_ARGS(local_id, remote_id))
DEFINE_CM_LOCAL_EVENT(staleconn_err);
DEFINE_CM_LOCAL_EVENT(no_priv_err);
DECLARE_EVENT_CLASS(icm_remote_class,
TP_PROTO(
u32 remote_id
),
TP_ARGS(remote_id),
TP_STRUCT__entry(
__field(u32, remote_id)
),
TP_fast_assign(
__entry->remote_id = remote_id;
),
TP_printk("remote_id=%u",
__entry->remote_id
)
);
#define DEFINE_CM_REMOTE_EVENT(name) \
DEFINE_EVENT(icm_remote_class, \
icm_##name, \
TP_PROTO( \
u32 remote_id \
), \
TP_ARGS(remote_id))
DEFINE_CM_REMOTE_EVENT(remote_no_priv_err);
DEFINE_CM_REMOTE_EVENT(insert_failed_err);
TRACE_EVENT(icm_send_rep_err,
TP_PROTO(
__be32 local_id,
enum ib_cm_state state
),
TP_ARGS(local_id, state),
TP_STRUCT__entry(
__field(unsigned int, local_id)
__field(unsigned long, state)
),
TP_fast_assign(
__entry->local_id = be32_to_cpu(local_id);
__entry->state = state;
),
TP_printk("local_id=%u state=%s",
__entry->local_id, show_ib_cm_state(__entry->state)
)
);
TRACE_EVENT(icm_rep_unknown_err,
TP_PROTO(
unsigned int local_id,
unsigned int remote_id,
enum ib_cm_state state
),
TP_ARGS(local_id, remote_id, state),
TP_STRUCT__entry(
__field(unsigned int, local_id)
__field(unsigned int, remote_id)
__field(unsigned long, state)
),
TP_fast_assign(
__entry->local_id = local_id;
__entry->remote_id = remote_id;
__entry->state = state;
),
TP_printk("local_id=%u remote_id=%u state=%s",
__entry->local_id, __entry->remote_id,
show_ib_cm_state(__entry->state)
)
);
TRACE_EVENT(icm_handler_err,
TP_PROTO(
enum ib_cm_event_type event
),
TP_ARGS(event),
TP_STRUCT__entry(
__field(unsigned long, event)
),
TP_fast_assign(
__entry->event = event;
),
TP_printk("unhandled event=%s",
rdma_show_ib_cm_event(__entry->event)
)
);
TRACE_EVENT(icm_mad_send_err,
TP_PROTO(
enum ib_cm_state state,
enum ib_wc_status wc_status
),
TP_ARGS(state, wc_status),
TP_STRUCT__entry(
__field(unsigned long, state)
__field(unsigned long, wc_status)
),
TP_fast_assign(
__entry->state = state;
__entry->wc_status = wc_status;
),
TP_printk("state=%s completion status=%s",
show_ib_cm_state(__entry->state),
rdma_show_wc_status(__entry->wc_status)
)
);
#endif /* _TRACE_IB_CMA_H */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../drivers/infiniband/core
#define TRACE_INCLUDE_FILE cm_trace
#include <trace/define_trace.h>