openmpi/ompi/mca/coll/monitoring/coll_monitoring_component.c

257 lines
13 KiB
C

/*
* Copyright (c) 2016-2017 Inria. All rights reserved.
* Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include <ompi_config.h>
#include "coll_monitoring.h"
#include <ompi/constants.h>
#include <ompi/communicator/communicator.h>
#include <ompi/mca/coll/coll.h>
#include <opal/mca/base/mca_base_component_repository.h>
#define MONITORING_SAVE_PREV_COLL_API(__module, __comm, __api) \
do { \
if( NULL != __comm->c_coll->coll_ ## __api ## _module ) { \
__module->real.coll_ ## __api = __comm->c_coll->coll_ ## __api; \
__module->real.coll_ ## __api ## _module = __comm->c_coll->coll_ ## __api ## _module; \
OBJ_RETAIN(__module->real.coll_ ## __api ## _module); \
} else { \
/* If no function previously provided, do not monitor */ \
__module->super.coll_ ## __api = NULL; \
OPAL_MONITORING_PRINT_WARN("COMM \"%s\": No monitoring available for " \
"coll_" # __api, __comm->c_name); \
} \
if( NULL != __comm->c_coll->coll_i ## __api ## _module ) { \
__module->real.coll_i ## __api = __comm->c_coll->coll_i ## __api; \
__module->real.coll_i ## __api ## _module = __comm->c_coll->coll_i ## __api ## _module; \
OBJ_RETAIN(__module->real.coll_i ## __api ## _module); \
} else { \
/* If no function previously provided, do not monitor */ \
__module->super.coll_i ## __api = NULL; \
OPAL_MONITORING_PRINT_WARN("COMM \"%s\": No monitoring available for " \
"coll_i" # __api, __comm->c_name); \
} \
} while(0)
#define MONITORING_RELEASE_PREV_COLL_API(__module, __comm, __api) \
do { \
if( NULL != __module->real.coll_ ## __api ## _module ) { \
if( NULL != __module->real.coll_ ## __api ## _module->coll_module_disable ) { \
__module->real.coll_ ## __api ## _module->coll_module_disable(__module->real.coll_ ## __api ## _module, __comm); \
} \
OBJ_RELEASE(__module->real.coll_ ## __api ## _module); \
__module->real.coll_ ## __api = NULL; \
__module->real.coll_ ## __api ## _module = NULL; \
} \
if( NULL != __module->real.coll_i ## __api ## _module ) { \
if( NULL != __module->real.coll_i ## __api ## _module->coll_module_disable ) { \
__module->real.coll_i ## __api ## _module->coll_module_disable(__module->real.coll_i ## __api ## _module, __comm); \
} \
OBJ_RELEASE(__module->real.coll_i ## __api ## _module); \
__module->real.coll_i ## __api = NULL; \
__module->real.coll_i ## __api ## _module = NULL; \
} \
} while(0)
#define MONITORING_SET_FULL_PREV_COLL_API(m, c, operation) \
do { \
operation(m, c, allgather); \
operation(m, c, allgatherv); \
operation(m, c, allreduce); \
operation(m, c, alltoall); \
operation(m, c, alltoallv); \
operation(m, c, alltoallw); \
operation(m, c, barrier); \
operation(m, c, bcast); \
operation(m, c, exscan); \
operation(m, c, gather); \
operation(m, c, gatherv); \
operation(m, c, reduce); \
operation(m, c, reduce_scatter); \
operation(m, c, reduce_scatter_block); \
operation(m, c, scan); \
operation(m, c, scatter); \
operation(m, c, scatterv); \
operation(m, c, neighbor_allgather); \
operation(m, c, neighbor_allgatherv); \
operation(m, c, neighbor_alltoall); \
operation(m, c, neighbor_alltoallv); \
operation(m, c, neighbor_alltoallw); \
} while(0)
#define MONITORING_SAVE_FULL_PREV_COLL_API(m, c) \
MONITORING_SET_FULL_PREV_COLL_API((m), (c), MONITORING_SAVE_PREV_COLL_API)
#define MONITORING_RELEASE_FULL_PREV_COLL_API(m, c) \
MONITORING_SET_FULL_PREV_COLL_API((m), (c), MONITORING_RELEASE_PREV_COLL_API)
static int mca_coll_monitoring_component_open(void)
{
return OMPI_SUCCESS;
}
static int mca_coll_monitoring_component_close(void)
{
OPAL_MONITORING_PRINT_INFO("coll_module_close");
mca_common_monitoring_finalize();
return OMPI_SUCCESS;
}
static int mca_coll_monitoring_component_init(bool enable_progress_threads,
bool enable_mpi_threads)
{
OPAL_MONITORING_PRINT_INFO("coll_module_init");
return mca_common_monitoring_init();
}
static int mca_coll_monitoring_component_register(void)
{
return OMPI_SUCCESS;
}
static int
mca_coll_monitoring_module_enable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm)
{
mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
if( 1 == opal_atomic_add_fetch_32(&monitoring_module->is_initialized, 1) ) {
MONITORING_SAVE_FULL_PREV_COLL_API(monitoring_module, comm);
monitoring_module->data = mca_common_monitoring_coll_new(comm);
OPAL_MONITORING_PRINT_INFO("coll_module_enabled");
}
return OMPI_SUCCESS;
}
static int
mca_coll_monitoring_module_disable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm)
{
mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
if( 0 == opal_atomic_sub_fetch_32(&monitoring_module->is_initialized, 1) ) {
MONITORING_RELEASE_FULL_PREV_COLL_API(monitoring_module, comm);
mca_common_monitoring_coll_release(monitoring_module->data);
monitoring_module->data = NULL;
OPAL_MONITORING_PRINT_INFO("coll_module_disabled");
}
return OMPI_SUCCESS;
}
static int mca_coll_monitoring_ft_event(int state)
{
switch(state) {
case OPAL_CRS_CHECKPOINT:
case OPAL_CRS_CONTINUE:
case OPAL_CRS_RESTART:
case OPAL_CRS_TERM:
default:
;
}
return OMPI_SUCCESS;
}
static mca_coll_base_module_t*
mca_coll_monitoring_component_query(struct ompi_communicator_t*comm, int*priority)
{
OPAL_MONITORING_PRINT_INFO("coll_module_query");
mca_coll_monitoring_module_t*monitoring_module = OBJ_NEW(mca_coll_monitoring_module_t);
if( NULL == monitoring_module ) return (*priority = -1, NULL);
/* Initialize module functions */
monitoring_module->super.coll_module_enable = mca_coll_monitoring_module_enable;
monitoring_module->super.coll_module_disable = mca_coll_monitoring_module_disable;
monitoring_module->super.ft_event = mca_coll_monitoring_ft_event;
/* Initialise module collectives functions */
/* Blocking functions */
monitoring_module->super.coll_allgather = mca_coll_monitoring_allgather;
monitoring_module->super.coll_allgatherv = mca_coll_monitoring_allgatherv;
monitoring_module->super.coll_allreduce = mca_coll_monitoring_allreduce;
monitoring_module->super.coll_alltoall = mca_coll_monitoring_alltoall;
monitoring_module->super.coll_alltoallv = mca_coll_monitoring_alltoallv;
monitoring_module->super.coll_alltoallw = mca_coll_monitoring_alltoallw;
monitoring_module->super.coll_barrier = mca_coll_monitoring_barrier;
monitoring_module->super.coll_bcast = mca_coll_monitoring_bcast;
monitoring_module->super.coll_exscan = mca_coll_monitoring_exscan;
monitoring_module->super.coll_gather = mca_coll_monitoring_gather;
monitoring_module->super.coll_gatherv = mca_coll_monitoring_gatherv;
monitoring_module->super.coll_reduce = mca_coll_monitoring_reduce;
monitoring_module->super.coll_reduce_scatter = mca_coll_monitoring_reduce_scatter;
monitoring_module->super.coll_reduce_scatter_block = mca_coll_monitoring_reduce_scatter_block;
monitoring_module->super.coll_scan = mca_coll_monitoring_scan;
monitoring_module->super.coll_scatter = mca_coll_monitoring_scatter;
monitoring_module->super.coll_scatterv = mca_coll_monitoring_scatterv;
/* Nonblocking functions */
monitoring_module->super.coll_iallgather = mca_coll_monitoring_iallgather;
monitoring_module->super.coll_iallgatherv = mca_coll_monitoring_iallgatherv;
monitoring_module->super.coll_iallreduce = mca_coll_monitoring_iallreduce;
monitoring_module->super.coll_ialltoall = mca_coll_monitoring_ialltoall;
monitoring_module->super.coll_ialltoallv = mca_coll_monitoring_ialltoallv;
monitoring_module->super.coll_ialltoallw = mca_coll_monitoring_ialltoallw;
monitoring_module->super.coll_ibarrier = mca_coll_monitoring_ibarrier;
monitoring_module->super.coll_ibcast = mca_coll_monitoring_ibcast;
monitoring_module->super.coll_iexscan = mca_coll_monitoring_iexscan;
monitoring_module->super.coll_igather = mca_coll_monitoring_igather;
monitoring_module->super.coll_igatherv = mca_coll_monitoring_igatherv;
monitoring_module->super.coll_ireduce = mca_coll_monitoring_ireduce;
monitoring_module->super.coll_ireduce_scatter = mca_coll_monitoring_ireduce_scatter;
monitoring_module->super.coll_ireduce_scatter_block = mca_coll_monitoring_ireduce_scatter_block;
monitoring_module->super.coll_iscan = mca_coll_monitoring_iscan;
monitoring_module->super.coll_iscatter = mca_coll_monitoring_iscatter;
monitoring_module->super.coll_iscatterv = mca_coll_monitoring_iscatterv;
/* Neighborhood functions */
monitoring_module->super.coll_neighbor_allgather = mca_coll_monitoring_neighbor_allgather;
monitoring_module->super.coll_neighbor_allgatherv = mca_coll_monitoring_neighbor_allgatherv;
monitoring_module->super.coll_neighbor_alltoall = mca_coll_monitoring_neighbor_alltoall;
monitoring_module->super.coll_neighbor_alltoallv = mca_coll_monitoring_neighbor_alltoallv;
monitoring_module->super.coll_neighbor_alltoallw = mca_coll_monitoring_neighbor_alltoallw;
monitoring_module->super.coll_ineighbor_allgather = mca_coll_monitoring_ineighbor_allgather;
monitoring_module->super.coll_ineighbor_allgatherv = mca_coll_monitoring_ineighbor_allgatherv;
monitoring_module->super.coll_ineighbor_alltoall = mca_coll_monitoring_ineighbor_alltoall;
monitoring_module->super.coll_ineighbor_alltoallv = mca_coll_monitoring_ineighbor_alltoallv;
monitoring_module->super.coll_ineighbor_alltoallw = mca_coll_monitoring_ineighbor_alltoallw;
/* Initialization flag */
monitoring_module->is_initialized = 0;
*priority = mca_coll_monitoring_component.priority;
return &(monitoring_module->super);
}
mca_coll_monitoring_component_t mca_coll_monitoring_component = {
.super = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
.collm_version = {
MCA_COLL_BASE_VERSION_2_0_0,
.mca_component_name = "monitoring", /* MCA component name */
MCA_MONITORING_MAKE_VERSION,
.mca_open_component = mca_coll_monitoring_component_open, /* component open */
.mca_close_component = mca_coll_monitoring_component_close, /* component close */
.mca_register_component_params = mca_coll_monitoring_component_register
},
.collm_data = {
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.collm_init_query = mca_coll_monitoring_component_init,
.collm_comm_query = mca_coll_monitoring_component_query
},
.priority = INT_MAX
};
OBJ_CLASS_INSTANCE(mca_coll_monitoring_module_t,
mca_coll_base_module_t,
NULL,
NULL);