forked from openkylin/openmpi
1452 lines
46 KiB
C
1452 lines
46 KiB
C
|
/*
|
||
|
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||
|
* University Research and Technology
|
||
|
* Corporation. All rights reserved.
|
||
|
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||
|
* of Tennessee Research Foundation. All rights
|
||
|
* reserved.
|
||
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||
|
* University of Stuttgart. All rights reserved.
|
||
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||
|
* All rights reserved.
|
||
|
* Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
|
||
|
* reserved.
|
||
|
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||
|
* Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved.
|
||
|
* Copyright (c) 2015 Research Organization for Information Science
|
||
|
* and Technology (RIST). All rights reserved.
|
||
|
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||
|
* $COPYRIGHT$
|
||
|
*
|
||
|
* Additional copyrights may follow
|
||
|
*
|
||
|
* $HEADER$
|
||
|
*/
|
||
|
|
||
|
/** @file
|
||
|
*
|
||
|
* OPAL Layer Checkpoint/Restart Runtime functions
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
#include "opal_config.h"
|
||
|
|
||
|
#include <string.h>
|
||
|
#include <errno.h>
|
||
|
#ifdef HAVE_UNISTD_H
|
||
|
#include <unistd.h>
|
||
|
#endif /* HAVE_UNISTD_H */
|
||
|
#ifdef HAVE_FCNTL_H
|
||
|
#include <fcntl.h>
|
||
|
#endif /* HAVE_FCNTL_H */
|
||
|
#ifdef HAVE_SYS_TYPES_H
|
||
|
#include <sys/types.h>
|
||
|
#endif /* HAVE_SYS_TYPES_H */
|
||
|
#ifdef HAVE_SYS_STAT_H
|
||
|
#include <sys/stat.h> /* for mkfifo */
|
||
|
#endif /* HAVE_SYS_STAT_H */
|
||
|
#include <signal.h>
|
||
|
|
||
|
#include "opal/class/opal_object.h"
|
||
|
#include "opal/util/opal_environ.h"
|
||
|
#include "opal/util/show_help.h"
|
||
|
#include "opal/util/output.h"
|
||
|
#include "opal/util/malloc.h"
|
||
|
#include "opal/util/keyval_parse.h"
|
||
|
#include "opal/util/opal_environ.h"
|
||
|
#include "opal/util/argv.h"
|
||
|
#include "opal/memoryhooks/memory.h"
|
||
|
|
||
|
#include "opal/mca/base/base.h"
|
||
|
#include "opal/runtime/opal_cr.h"
|
||
|
#include "opal/runtime/opal.h"
|
||
|
#include "opal/constants.h"
|
||
|
|
||
|
#include "opal/mca/if/base/base.h"
|
||
|
#include "opal/mca/memcpy/base/base.h"
|
||
|
#include "opal/mca/memory/base/base.h"
|
||
|
#include "opal/mca/timer/base/base.h"
|
||
|
|
||
|
#include "opal/threads/mutex.h"
|
||
|
#include "opal/threads/threads.h"
|
||
|
#include "opal/mca/crs/base/base.h"
|
||
|
|
||
|
/******************
|
||
|
* Global Var Decls
|
||
|
******************/
|
||
|
#if OPAL_ENABLE_CRDEBUG == 1
|
||
|
static opal_thread_t **opal_cr_debug_free_threads = NULL;
|
||
|
static int opal_cr_debug_num_free_threads = 0;
|
||
|
static int opal_cr_debug_threads_already_waiting = false;
|
||
|
|
||
|
int MPIR_debug_with_checkpoint = 0;
|
||
|
static volatile int MPIR_checkpoint_debug_gate = 0;
|
||
|
|
||
|
int opal_cr_debug_signal = 0;
|
||
|
#endif
|
||
|
|
||
|
bool opal_cr_stall_check = false;
|
||
|
bool opal_cr_currently_stalled = false;
|
||
|
int opal_cr_output = -1;
|
||
|
int opal_cr_verbose = 0;
|
||
|
int opal_cr_initalized = 0;
|
||
|
|
||
|
static double opal_cr_get_time(void);
|
||
|
static void display_indv_timer_core(double diff, char *str);
|
||
|
static double timer_start[OPAL_CR_TIMER_MAX];
|
||
|
bool opal_cr_timing_barrier_enabled = false;
|
||
|
bool opal_cr_timing_enabled = false;
|
||
|
int opal_cr_timing_my_rank = 0;
|
||
|
int opal_cr_timing_target_rank = 0;
|
||
|
|
||
|
/******************
|
||
|
* Local Functions & Var Decls
|
||
|
******************/
|
||
|
static int extract_env_vars(int prev_pid, char * file_name);
|
||
|
|
||
|
static void opal_cr_sigpipe_debug_signal_handler (int signo);
|
||
|
|
||
|
static opal_cr_user_inc_callback_fn_t cur_user_coord_callback[OPAL_CR_INC_MAX] = {NULL};
|
||
|
static opal_cr_coord_callback_fn_t cur_coord_callback = NULL;
|
||
|
static opal_cr_notify_callback_fn_t cur_notify_callback = NULL;
|
||
|
|
||
|
static int core_prev_pid = 0;
|
||
|
|
||
|
/******************
|
||
|
* Interface Functions & Vars
|
||
|
******************/
|
||
|
char * opal_cr_pipe_dir = NULL;
|
||
|
int opal_cr_entry_point_signal = 0;
|
||
|
bool opal_cr_is_enabled = true;
|
||
|
bool opal_cr_is_tool = false;
|
||
|
|
||
|
/* Current checkpoint state */
|
||
|
int opal_cr_checkpointing_state = OPAL_CR_STATUS_NONE;
|
||
|
|
||
|
/* Current checkpoint request channel state */
|
||
|
int opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE;
|
||
|
|
||
|
static bool opal_cr_debug_sigpipe = false;
|
||
|
|
||
|
bool opal_cr_continue_like_restart = false;
|
||
|
|
||
|
#if OPAL_ENABLE_FT_THREAD == 1
|
||
|
/*****************
|
||
|
* Threading Functions and Variables
|
||
|
*****************/
|
||
|
static void* opal_cr_thread_fn(opal_object_t *obj);
|
||
|
bool opal_cr_thread_is_done = false;
|
||
|
bool opal_cr_thread_is_active = false;
|
||
|
bool opal_cr_thread_in_library = false;
|
||
|
bool opal_cr_thread_use_if_avail = true;
|
||
|
int32_t opal_cr_thread_num_in_library = 0;
|
||
|
int opal_cr_thread_sleep_check = 0;
|
||
|
int opal_cr_thread_sleep_wait = 0;
|
||
|
opal_thread_t opal_cr_thread;
|
||
|
opal_mutex_t opal_cr_thread_lock;
|
||
|
#if 0
|
||
|
#define OPAL_CR_LOCK() opal_cr_thread_in_library = true; opal_mutex_lock(&opal_cr_thread_lock);
|
||
|
#define OPAL_CR_UNLOCK() opal_cr_thread_in_library = false; opal_mutex_unlock(&opal_cr_thread_lock);
|
||
|
#define OPAL_CR_THREAD_LOCK() opal_mutex_lock(&opal_cr_thread_lock);
|
||
|
#define OPAL_CR_THREAD_UNLOCK() opal_mutex_unlock(&opal_cr_thread_lock);
|
||
|
#else
|
||
|
/* This technique will potentially starve the thread, but that is OK since
|
||
|
* it is only there as support for when the process is not in the MPI library
|
||
|
*/
|
||
|
static const uint32_t ThreadFlag = 0x1;
|
||
|
static const uint32_t ProcInc = 0x2;
|
||
|
|
||
|
#define OPAL_CR_LOCK() \
|
||
|
{ \
|
||
|
opal_cr_thread_in_library = true; \
|
||
|
OPAL_THREAD_ADD_FETCH32(&opal_cr_thread_num_in_library, ProcInc); \
|
||
|
while( (opal_cr_thread_num_in_library & ThreadFlag ) != 0 ) { \
|
||
|
sched_yield(); \
|
||
|
} \
|
||
|
}
|
||
|
#define OPAL_CR_UNLOCK() \
|
||
|
{ \
|
||
|
OPAL_THREAD_ADD_FETCH32(&opal_cr_thread_num_in_library, -ProcInc); \
|
||
|
if( opal_cr_thread_num_in_library <= 0 ) { \
|
||
|
opal_cr_thread_in_library = false; \
|
||
|
} \
|
||
|
}
|
||
|
#define OPAL_CR_THREAD_LOCK() \
|
||
|
{ \
|
||
|
int32_t _tmp_value = 0; \
|
||
|
while(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_32 (&opal_cr_thread_num_in_library, &_tmp_value, ThreadFlag)) { \
|
||
|
if( !opal_cr_thread_is_active && opal_cr_thread_is_done) { \
|
||
|
break; \
|
||
|
} \
|
||
|
sched_yield(); \
|
||
|
usleep(opal_cr_thread_sleep_check); \
|
||
|
} \
|
||
|
}
|
||
|
#define OPAL_CR_THREAD_UNLOCK() \
|
||
|
{ \
|
||
|
OPAL_THREAD_ADD_FETCH32(&opal_cr_thread_num_in_library, -ThreadFlag); \
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#endif /* OPAL_ENABLE_FT_THREAD == 1 */
|
||
|
|
||
|
int opal_cr_set_enabled(bool en)
|
||
|
{
|
||
|
opal_cr_is_enabled = en;
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
static int opal_cr_register (void)
|
||
|
{
|
||
|
int ret;
|
||
|
#if OPAL_ENABLE_CRDEBUG == 1
|
||
|
int t;
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Some startup MCA parameters
|
||
|
*/
|
||
|
ret = mca_base_var_register ("opal", "opal", "cr", "verbose",
|
||
|
"Verbose output level for the runtime OPAL Checkpoint/Restart functionality",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_LOCAL,
|
||
|
&opal_cr_verbose);
|
||
|
if (0 > ret) {
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
opal_cr_is_enabled = false;
|
||
|
(void) mca_base_var_register("opal", "ft", "cr", "enabled",
|
||
|
"Enable fault tolerance for this program",
|
||
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&opal_cr_is_enabled);
|
||
|
|
||
|
opal_cr_timing_enabled = false;
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "enable_timer",
|
||
|
"Enable Checkpoint timer (Default: Disabled)",
|
||
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&opal_cr_timing_enabled);
|
||
|
|
||
|
opal_cr_timing_barrier_enabled = false;
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "enable_timer_barrier",
|
||
|
"Enable Checkpoint timer Barrier. Must have opal_cr_enable_timer set. (Default: Disabled)",
|
||
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, opal_cr_timing_enabled ? MCA_BASE_VAR_FLAG_SETTABLE : 0,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&opal_cr_timing_barrier_enabled);
|
||
|
opal_cr_timing_barrier_enabled = opal_cr_timing_barrier_enabled && opal_cr_timing_enabled;
|
||
|
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "timer_target_rank",
|
||
|
"Target Rank for the timer (Default: 0)",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&opal_cr_timing_target_rank);
|
||
|
|
||
|
#if OPAL_ENABLE_FT_THREAD == 1
|
||
|
opal_cr_thread_use_if_avail = false;
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "use_thread",
|
||
|
"Use an async thread to checkpoint this program (Default: Disabled)",
|
||
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&opal_cr_thread_use_if_avail);
|
||
|
|
||
|
opal_cr_thread_sleep_check = 0;
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "thread_sleep_check",
|
||
|
"Time to sleep between checking for a checkpoint (Default: 0)",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&opal_cr_thread_sleep_check);
|
||
|
|
||
|
opal_cr_thread_sleep_wait = 100;
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "thread_sleep_wait",
|
||
|
"Time to sleep waiting for process to exit MPI library (Default: 1000)",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&opal_cr_thread_sleep_wait);
|
||
|
#endif
|
||
|
|
||
|
opal_cr_is_tool = false;
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "is_tool",
|
||
|
"Is this a tool program, meaning does it require a fully operational OPAL or just enough to exec.",
|
||
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&opal_cr_is_tool);
|
||
|
|
||
|
#ifndef __WINDOWS__
|
||
|
opal_cr_entry_point_signal = SIGUSR1;
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "signal",
|
||
|
"Checkpoint/Restart signal used to initialize an OPAL Only checkpoint of a program",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&opal_cr_entry_point_signal);
|
||
|
|
||
|
opal_cr_debug_sigpipe = false;
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "debug_sigpipe",
|
||
|
"Activate a signal handler for debugging SIGPIPE Errors that can happen on restart. (Default: Disabled)",
|
||
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&opal_cr_debug_sigpipe);
|
||
|
#else
|
||
|
opal_cr_is_tool = true; /* no support for CR on Windows yet */
|
||
|
#endif /* __WINDOWS__ */
|
||
|
|
||
|
#if OPAL_ENABLE_CRDEBUG == 1
|
||
|
MPIR_debug_with_checkpoint = 0;
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "enable_crdebug",
|
||
|
"Enable checkpoint/restart debugging",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&MPIR_debug_with_checkpoint);
|
||
|
|
||
|
opal_cr_debug_num_free_threads = 3;
|
||
|
opal_cr_debug_free_threads = (opal_thread_t **)malloc(sizeof(opal_thread_t *) * opal_cr_debug_num_free_threads );
|
||
|
for(t = 0; t < opal_cr_debug_num_free_threads; ++t ) {
|
||
|
opal_cr_debug_free_threads[t] = NULL;
|
||
|
}
|
||
|
|
||
|
opal_cr_debug_signal = SIGTSTP;
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "crdebug_signal",
|
||
|
"Checkpoint/Restart signal used to hold threads when debugging",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&opal_cr_debug_signal);
|
||
|
#endif
|
||
|
|
||
|
opal_cr_pipe_dir = (char *) opal_tmp_directory();
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "tmp_dir",
|
||
|
"Temporary directory to place rendezvous files for a checkpoint",
|
||
|
MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&opal_cr_pipe_dir);
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
|
||
|
int opal_cr_init(void )
|
||
|
{
|
||
|
int ret, exit_status = OPAL_SUCCESS;
|
||
|
opal_cr_coord_callback_fn_t prev_coord_func;
|
||
|
|
||
|
if( ++opal_cr_initalized != 1 ) {
|
||
|
if( opal_cr_initalized < 1 ) {
|
||
|
exit_status = OPAL_ERROR;
|
||
|
goto cleanup;
|
||
|
}
|
||
|
exit_status = OPAL_SUCCESS;
|
||
|
goto cleanup;
|
||
|
}
|
||
|
|
||
|
ret = opal_cr_register ();
|
||
|
if (OPAL_SUCCESS != ret) {
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
if(0 != opal_cr_verbose) {
|
||
|
opal_cr_output = opal_output_open(NULL);
|
||
|
opal_output_set_verbosity(opal_cr_output, opal_cr_verbose);
|
||
|
}
|
||
|
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: Verbose Level: %d",
|
||
|
opal_cr_verbose);
|
||
|
|
||
|
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: FT Enabled: %s",
|
||
|
opal_cr_is_enabled ? "true" : "false");
|
||
|
|
||
|
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: Is a tool program: %s",
|
||
|
opal_cr_is_tool ? "true" : "false");
|
||
|
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: Debug SIGPIPE: %d (%s)",
|
||
|
opal_cr_verbose, (opal_cr_debug_sigpipe ? "True" : "False"));
|
||
|
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: Checkpoint Signal: %d",
|
||
|
opal_cr_entry_point_signal);
|
||
|
|
||
|
#if OPAL_ENABLE_FT_THREAD == 1
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: FT Use thread: %s",
|
||
|
opal_cr_thread_use_if_avail ? "true" : "false");
|
||
|
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: FT thread sleep: check = %d, wait = %d",
|
||
|
opal_cr_thread_sleep_check, opal_cr_thread_sleep_wait);
|
||
|
|
||
|
/* If we have a thread, then attach the SIGPIPE signal handler there since
|
||
|
* it is most likely to be the one that needs it.
|
||
|
*/
|
||
|
if( opal_cr_debug_sigpipe && !opal_cr_thread_use_if_avail ) {
|
||
|
if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
|
||
|
;
|
||
|
}
|
||
|
}
|
||
|
#else
|
||
|
if( opal_cr_debug_sigpipe ) {
|
||
|
if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
|
||
|
;
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#if OPAL_ENABLE_CRDEBUG == 1
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: C/R Debugging Enabled [%s]\n",
|
||
|
(MPIR_debug_with_checkpoint ? "True": "False"));
|
||
|
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: Checkpoint Signal (Debug): %d",
|
||
|
opal_cr_debug_signal);
|
||
|
|
||
|
if( SIG_ERR == signal(opal_cr_debug_signal, MPIR_checkpoint_debugger_signal_handler) ) {
|
||
|
opal_output(opal_cr_output,
|
||
|
"opal_cr: init: Failed to register C/R debug signal (%d)",
|
||
|
opal_cr_debug_signal);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: Temp Directory: %s",
|
||
|
opal_cr_pipe_dir);
|
||
|
|
||
|
if( !opal_cr_is_tool ) {
|
||
|
/* Register the OPAL interlevel coordination callback */
|
||
|
opal_cr_reg_coord_callback(opal_cr_coord, &prev_coord_func);
|
||
|
|
||
|
opal_cr_stall_check = false;
|
||
|
opal_cr_currently_stalled = false;
|
||
|
|
||
|
} /* End opal_cr_is_tool = true */
|
||
|
|
||
|
/*
|
||
|
* If fault tolerance was not compiled in then
|
||
|
* we need to make sure that the listener thread is active to tell
|
||
|
* the tools that this is not a checkpointable job.
|
||
|
* We don't need the CRS framework to be initalized.
|
||
|
*/
|
||
|
#if OPAL_ENABLE_FT_CR == 1
|
||
|
/*
|
||
|
* Open the checkpoint / restart service components
|
||
|
*/
|
||
|
if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_crs_base_framework, 0))) {
|
||
|
opal_show_help( "help-opal-runtime.txt",
|
||
|
"opal_cr_init:no-crs", true,
|
||
|
"opal_crs_base_open", ret );
|
||
|
exit_status = ret;
|
||
|
goto cleanup;
|
||
|
}
|
||
|
|
||
|
if (OPAL_SUCCESS != (ret = opal_crs_base_select())) {
|
||
|
opal_show_help( "help-opal-runtime.txt",
|
||
|
"opal_cr_init:no-crs", true,
|
||
|
"opal_crs_base_select", ret );
|
||
|
exit_status = ret;
|
||
|
goto cleanup;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#if OPAL_ENABLE_FT_THREAD == 1
|
||
|
if( !opal_cr_is_tool && opal_cr_thread_use_if_avail) {
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: starting the thread\n");
|
||
|
|
||
|
/* JJH: We really do need this line below since it enables
|
||
|
* actual locks for threads. However currently the
|
||
|
* upper layers will deadlock if it is enabled.
|
||
|
* So hack around the problem for now, while working
|
||
|
* on a complete solution. See ticket #2741 for more
|
||
|
* details.
|
||
|
* opal_set_using_threads(true);
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* Start the thread
|
||
|
*/
|
||
|
OBJ_CONSTRUCT(&opal_cr_thread, opal_thread_t);
|
||
|
OBJ_CONSTRUCT(&opal_cr_thread_lock, opal_mutex_t);
|
||
|
|
||
|
opal_cr_thread_is_done = false;
|
||
|
opal_cr_thread_is_active = false;
|
||
|
opal_cr_thread_in_library = false;
|
||
|
opal_cr_thread_num_in_library = 0;
|
||
|
|
||
|
opal_cr_thread.t_run = opal_cr_thread_fn;
|
||
|
opal_cr_thread.t_arg = NULL;
|
||
|
opal_thread_start(&opal_cr_thread);
|
||
|
|
||
|
} /* End opal_cr_is_tool = true */
|
||
|
else {
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: *Not* Using C/R thread\n");
|
||
|
}
|
||
|
#endif /* OPAL_ENABLE_FT_THREAD == 1 */
|
||
|
|
||
|
cleanup:
|
||
|
return exit_status;
|
||
|
}
|
||
|
|
||
|
int opal_cr_finalize(void)
|
||
|
{
|
||
|
int exit_status = OPAL_SUCCESS;
|
||
|
|
||
|
if( --opal_cr_initalized != 0 ) {
|
||
|
if( opal_cr_initalized < 0 ) {
|
||
|
return OPAL_ERROR;
|
||
|
}
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
if( !opal_cr_is_tool ) {
|
||
|
#if OPAL_ENABLE_FT_THREAD == 1
|
||
|
if( opal_cr_thread_use_if_avail ) {
|
||
|
void *data;
|
||
|
/*
|
||
|
* Stop the thread
|
||
|
*/
|
||
|
opal_cr_thread_is_done = true;
|
||
|
opal_cr_thread_is_active = false;
|
||
|
opal_cr_thread_in_library = true;
|
||
|
|
||
|
opal_thread_join(&opal_cr_thread, &data);
|
||
|
OBJ_DESTRUCT(&opal_cr_thread);
|
||
|
OBJ_DESTRUCT(&opal_cr_thread_lock);
|
||
|
}
|
||
|
#endif /* OPAL_ENABLE_FT_THREAD == 1 */
|
||
|
|
||
|
/* Nothing to do for just process notifications */
|
||
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_TERM;
|
||
|
opal_cr_checkpoint_request = OPAL_CR_STATUS_TERM;
|
||
|
}
|
||
|
|
||
|
#if OPAL_ENABLE_CRDEBUG == 1
|
||
|
if( NULL != opal_cr_debug_free_threads ) {
|
||
|
free( opal_cr_debug_free_threads );
|
||
|
opal_cr_debug_free_threads = NULL;
|
||
|
}
|
||
|
opal_cr_debug_num_free_threads = 0;
|
||
|
#endif
|
||
|
|
||
|
if (NULL != opal_cr_pipe_dir) {
|
||
|
free(opal_cr_pipe_dir);
|
||
|
opal_cr_pipe_dir = NULL;
|
||
|
}
|
||
|
|
||
|
#if OPAL_ENABLE_FT_CR == 1
|
||
|
/*
|
||
|
* Close the checkpoint / restart service components
|
||
|
*/
|
||
|
(void) mca_base_framework_close(&opal_crs_base_framework);
|
||
|
#endif
|
||
|
|
||
|
return exit_status;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Check if a checkpoint request needs to be operated upon
|
||
|
*/
|
||
|
void opal_cr_test_if_checkpoint_ready(void)
|
||
|
{
|
||
|
int ret;
|
||
|
|
||
|
if( opal_cr_currently_stalled) {
|
||
|
opal_output_verbose(20, opal_cr_output,
|
||
|
"opal_cr:opal_test_if_ready: JUMPING to Post Stall stage");
|
||
|
goto STAGE_1;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If there is no checkpoint request to act on
|
||
|
* then just return
|
||
|
*/
|
||
|
if(OPAL_CR_STATUS_REQUESTED != opal_cr_checkpoint_request ) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If we are currently checkpointing:
|
||
|
* - If a request is pending then cancel it
|
||
|
* - o.w., skip it.
|
||
|
*/
|
||
|
if(OPAL_CR_STATUS_RUNNING == opal_cr_checkpointing_state ) {
|
||
|
if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_IN_PROGRESS) ) ) {
|
||
|
opal_output(opal_cr_output,
|
||
|
"Error: opal_cr: test_if_checkpoint_ready: Respond [In Progress] Failed. (%d)",
|
||
|
ret);
|
||
|
}
|
||
|
opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If no CRS module is loaded return an error
|
||
|
*/
|
||
|
if (NULL == opal_crs.crs_checkpoint ) {
|
||
|
if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_NULL) ) ) {
|
||
|
opal_output(opal_cr_output,
|
||
|
"Error: opal_cr: test_if_checkpoint_ready: Respond [Not Able/NULL] Failed. (%d)",
|
||
|
ret);
|
||
|
}
|
||
|
opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Start the checkpoint
|
||
|
*/
|
||
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_RUNNING;
|
||
|
opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE;
|
||
|
|
||
|
STAGE_1:
|
||
|
if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_START) ) ) {
|
||
|
opal_output(opal_cr_output,
|
||
|
"Error: opal_cr: test_if_checkpoint_ready: Respond [Start Ckpt] Failed. (%d)",
|
||
|
ret);
|
||
|
}
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
/*******************************
|
||
|
* Notification Routines
|
||
|
*******************************/
|
||
|
int opal_cr_inc_core_prep(void)
|
||
|
{
|
||
|
int ret;
|
||
|
|
||
|
/*
|
||
|
* Call User Level INC
|
||
|
*/
|
||
|
if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_PRE_CRS_PRE_MPI,
|
||
|
OPAL_CR_INC_STATE_PREPARE)) ) {
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Use the registered coordination routine
|
||
|
*/
|
||
|
if(OPAL_SUCCESS != (ret = cur_coord_callback(OPAL_CRS_CHECKPOINT)) ) {
|
||
|
if ( OPAL_EXISTS != ret ) {
|
||
|
opal_output(opal_cr_output,
|
||
|
"opal_cr: inc_core: Error: cur_coord_callback(%d) failed! %d\n",
|
||
|
OPAL_CRS_CHECKPOINT, ret);
|
||
|
}
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Call User Level INC
|
||
|
*/
|
||
|
if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_PRE_CRS_POST_MPI,
|
||
|
OPAL_CR_INC_STATE_PREPARE)) ) {
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
core_prev_pid = getpid();
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
int opal_cr_inc_core_ckpt(pid_t pid,
|
||
|
opal_crs_base_snapshot_t *snapshot,
|
||
|
opal_crs_base_ckpt_options_t *options,
|
||
|
int *state)
|
||
|
{
|
||
|
int ret, exit_status = OPAL_SUCCESS;
|
||
|
|
||
|
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE0);
|
||
|
if(OPAL_SUCCESS != (ret = opal_crs.crs_checkpoint(pid,
|
||
|
snapshot,
|
||
|
options,
|
||
|
(opal_crs_state_type_t *)state))) {
|
||
|
opal_output(opal_cr_output,
|
||
|
"opal_cr: inc_core: Error: The checkpoint failed. %d\n", ret);
|
||
|
exit_status = ret;
|
||
|
}
|
||
|
|
||
|
if(*state == OPAL_CRS_CONTINUE) {
|
||
|
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE1);
|
||
|
|
||
|
if(options->term) {
|
||
|
*state = OPAL_CRS_TERM;
|
||
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_TERM;
|
||
|
} else {
|
||
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_CONTINUE;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
options->term = false;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If restarting read environment stuff that opal-restart left us.
|
||
|
*/
|
||
|
if(*state == OPAL_CRS_RESTART) {
|
||
|
opal_cr_refresh_environ(core_prev_pid);
|
||
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_RESTART_PRE;
|
||
|
}
|
||
|
|
||
|
return exit_status;
|
||
|
}
|
||
|
|
||
|
int opal_cr_inc_core_recover(int state)
|
||
|
{
|
||
|
int ret;
|
||
|
opal_cr_user_inc_callback_state_t cb_state;
|
||
|
|
||
|
if( opal_cr_checkpointing_state != OPAL_CR_STATUS_TERM &&
|
||
|
opal_cr_checkpointing_state != OPAL_CR_STATUS_CONTINUE &&
|
||
|
opal_cr_checkpointing_state != OPAL_CR_STATUS_RESTART_PRE &&
|
||
|
opal_cr_checkpointing_state != OPAL_CR_STATUS_RESTART_POST ) {
|
||
|
|
||
|
if(state == OPAL_CRS_CONTINUE) {
|
||
|
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE1);
|
||
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_CONTINUE;
|
||
|
}
|
||
|
/*
|
||
|
* If restarting read environment stuff that opal-restart left us.
|
||
|
*/
|
||
|
else if(state == OPAL_CRS_RESTART) {
|
||
|
opal_cr_refresh_environ(core_prev_pid);
|
||
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_RESTART_PRE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Call User Level INC
|
||
|
*/
|
||
|
if( OPAL_CRS_CONTINUE == state ) {
|
||
|
cb_state = OPAL_CR_INC_STATE_CONTINUE;
|
||
|
}
|
||
|
else if( OPAL_CRS_RESTART == state ) {
|
||
|
cb_state = OPAL_CR_INC_STATE_RESTART;
|
||
|
}
|
||
|
else {
|
||
|
cb_state = OPAL_CR_INC_STATE_ERROR;
|
||
|
}
|
||
|
|
||
|
if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_POST_CRS_PRE_MPI,
|
||
|
cb_state)) ) {
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Use the registered coordination routine
|
||
|
*/
|
||
|
if(OPAL_SUCCESS != (ret = cur_coord_callback(state)) ) {
|
||
|
if ( OPAL_EXISTS != ret ) {
|
||
|
opal_output(opal_cr_output,
|
||
|
"opal_cr: inc_core: Error: cur_coord_callback(%d) failed! %d\n",
|
||
|
state, ret);
|
||
|
}
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
if(OPAL_SUCCESS != (ret = ompi_trigger_user_inc_callback(OPAL_CR_INC_POST_CRS_POST_MPI,
|
||
|
cb_state)) ) {
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
#if OPAL_ENABLE_CRDEBUG == 1
|
||
|
opal_cr_debug_clear_current_ckpt_thread();
|
||
|
#endif
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
int opal_cr_inc_core(pid_t pid,
|
||
|
opal_crs_base_snapshot_t *snapshot,
|
||
|
opal_crs_base_ckpt_options_t *options,
|
||
|
int *state)
|
||
|
{
|
||
|
int ret, exit_status = OPAL_SUCCESS;
|
||
|
|
||
|
/*
|
||
|
* INC: Prepare stack using the registered coordination routine
|
||
|
*/
|
||
|
if(OPAL_SUCCESS != (ret = opal_cr_inc_core_prep() ) ) {
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* INC: Take the checkpoint
|
||
|
*/
|
||
|
if(OPAL_SUCCESS != (ret = opal_cr_inc_core_ckpt(pid, snapshot, options, state) ) ) {
|
||
|
exit_status = ret;
|
||
|
/* Don't return here since we want to restart the OPAL level stuff */
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* INC: Recover stack using the registered coordination routine
|
||
|
*/
|
||
|
if(OPAL_SUCCESS != (ret = opal_cr_inc_core_recover(*state) ) ) {
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
return exit_status;
|
||
|
}
|
||
|
|
||
|
/*******************************
|
||
|
* Coordination Routines
|
||
|
*******************************/
|
||
|
/**
|
||
|
* Current Coordination callback routines
|
||
|
*/
|
||
|
int opal_cr_coord(int state)
|
||
|
{
|
||
|
if(OPAL_CRS_CHECKPOINT == state) {
|
||
|
/* Do Checkpoint Phase work */
|
||
|
}
|
||
|
else if (OPAL_CRS_CONTINUE == state ) {
|
||
|
/* Do Continue Phase work */
|
||
|
}
|
||
|
else if (OPAL_CRS_RESTART == state ) {
|
||
|
/* Do Restart Phase work */
|
||
|
|
||
|
/*
|
||
|
* Re-initialize the event engine
|
||
|
* Otherwise it may/will use stale file descriptors which will disrupt
|
||
|
* the intended users of the soon-to-be newly assigned file descriptors.
|
||
|
*/
|
||
|
opal_event_reinit(opal_sync_event_base);
|
||
|
|
||
|
/*
|
||
|
* Flush if() functionality, since it caches system specific info.
|
||
|
*/
|
||
|
(void) mca_base_framework_close(&opal_if_base_framework);
|
||
|
/* Since opal_ifinit() is not exposed, the necessary
|
||
|
* functions will call it when needed. Just make sure we
|
||
|
* finalized this code so we don't get old socket addrs.
|
||
|
*/
|
||
|
opal_output_reopen_all();
|
||
|
}
|
||
|
else if (OPAL_CRS_TERM == state ) {
|
||
|
/* Do Continue Phase work in prep to terminate the application */
|
||
|
}
|
||
|
else {
|
||
|
/* We must have been in an error state from the checkpoint
|
||
|
* recreate everything, as in the Continue Phase
|
||
|
*/
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Here we are returning to either:
|
||
|
* - [orte | ompi]_notify()
|
||
|
*/
|
||
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_RESTART_POST;
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
int opal_cr_reg_notify_callback(opal_cr_notify_callback_fn_t new_func,
|
||
|
opal_cr_notify_callback_fn_t *prev_func)
|
||
|
{
|
||
|
/*
|
||
|
* Preserve the previous callback
|
||
|
*/
|
||
|
if( NULL != cur_notify_callback) {
|
||
|
*prev_func = cur_notify_callback;
|
||
|
}
|
||
|
else {
|
||
|
*prev_func = NULL;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Update the callbacks
|
||
|
*/
|
||
|
cur_notify_callback = new_func;
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
int opal_cr_user_inc_register_callback(opal_cr_user_inc_callback_event_t event,
|
||
|
opal_cr_user_inc_callback_fn_t function,
|
||
|
opal_cr_user_inc_callback_fn_t *prev_function)
|
||
|
{
|
||
|
if (event >= OPAL_CR_INC_MAX) {
|
||
|
return OPAL_ERROR;
|
||
|
}
|
||
|
|
||
|
if( NULL != cur_user_coord_callback[event] ) {
|
||
|
*prev_function = cur_user_coord_callback[event];
|
||
|
} else {
|
||
|
*prev_function = NULL;
|
||
|
}
|
||
|
|
||
|
cur_user_coord_callback[event] = function;
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
int ompi_trigger_user_inc_callback(opal_cr_user_inc_callback_event_t event,
|
||
|
opal_cr_user_inc_callback_state_t state)
|
||
|
{
|
||
|
if( NULL == cur_user_coord_callback[event] ) {
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
if (event >= OPAL_CR_INC_MAX) {
|
||
|
return OPAL_ERROR;
|
||
|
}
|
||
|
|
||
|
return ((cur_user_coord_callback[event])(event, state));
|
||
|
}
|
||
|
|
||
|
int opal_cr_reg_coord_callback(opal_cr_coord_callback_fn_t new_func,
|
||
|
opal_cr_coord_callback_fn_t *prev_func)
|
||
|
{
|
||
|
/*
|
||
|
* Preserve the previous callback
|
||
|
*/
|
||
|
if( NULL != cur_coord_callback) {
|
||
|
*prev_func = cur_coord_callback;
|
||
|
}
|
||
|
else {
|
||
|
*prev_func = NULL;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Update the callbacks
|
||
|
*/
|
||
|
cur_coord_callback = new_func;
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
int opal_cr_refresh_environ(int prev_pid) {
|
||
|
char *file_name;
|
||
|
#if OPAL_ENABLE_CRDEBUG == 1
|
||
|
char *tmp;
|
||
|
#endif
|
||
|
struct stat file_status;
|
||
|
|
||
|
if( 0 >= prev_pid ) {
|
||
|
prev_pid = getpid();
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Make sure the file exists. If it doesn't then this means 2 things:
|
||
|
* 1) We have already executed this function, and
|
||
|
* 2) The file has been deleted on the previous round.
|
||
|
*/
|
||
|
asprintf(&file_name, "%s/%s-%d", opal_tmp_directory(), OPAL_CR_BASE_ENV_NAME, prev_pid);
|
||
|
if (NULL == file_name) {
|
||
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
||
|
}
|
||
|
if(0 != stat(file_name, &file_status) ){
|
||
|
free(file_name);
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
#if OPAL_ENABLE_CRDEBUG == 1
|
||
|
mca_base_var_env_name ("opal_cr_enable_crdebug", &tmp);
|
||
|
opal_unsetenv(tmp, &environ);
|
||
|
free (tmp);
|
||
|
#endif
|
||
|
|
||
|
extract_env_vars(prev_pid, file_name);
|
||
|
|
||
|
#if OPAL_ENABLE_CRDEBUG == 1
|
||
|
MPIR_debug_with_checkpoint = 0;
|
||
|
(void) mca_base_var_register ("opal", "opal", "cr", "enable_crdebug",
|
||
|
"Enable checkpoint/restart debugging",
|
||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||
|
OPAL_INFO_LVL_8, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||
|
&MPIR_debug_with_checkpoint);
|
||
|
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: init: C/R Debugging Enabled [%s] (refresh)\n",
|
||
|
(MPIR_debug_with_checkpoint ? "True": "False"));
|
||
|
#endif
|
||
|
|
||
|
free(file_name);
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Extract environment variables from a saved file
|
||
|
* and place them in the environment.
|
||
|
*/
|
||
|
static int extract_env_vars(int prev_pid, char * file_name)
|
||
|
{
|
||
|
int exit_status = OPAL_SUCCESS;
|
||
|
FILE *env_data = NULL;
|
||
|
int len = OPAL_PATH_MAX;
|
||
|
char * tmp_str = NULL;
|
||
|
|
||
|
if( 0 >= prev_pid ) {
|
||
|
opal_output(opal_cr_output,
|
||
|
"opal_cr: extract_env_vars: Invalid PID (%d)\n",
|
||
|
prev_pid);
|
||
|
exit_status = OPAL_ERROR;
|
||
|
goto cleanup;
|
||
|
}
|
||
|
|
||
|
if (NULL == (env_data = fopen(file_name, "r")) ) {
|
||
|
exit_status = OPAL_ERROR;
|
||
|
goto cleanup;
|
||
|
}
|
||
|
|
||
|
tmp_str = (char *) malloc(sizeof(char) * OPAL_PATH_MAX);
|
||
|
if( NULL == tmp_str) {
|
||
|
exit_status = OPAL_ERR_OUT_OF_RESOURCE;
|
||
|
goto cleanup;
|
||
|
}
|
||
|
/* Extract an env var */
|
||
|
while(!feof(env_data) ) {
|
||
|
char **t_set = NULL;
|
||
|
|
||
|
if( NULL == fgets(tmp_str, OPAL_PATH_MAX, env_data) ) {
|
||
|
exit_status = OPAL_ERROR;
|
||
|
goto cleanup;
|
||
|
}
|
||
|
len = strlen(tmp_str);
|
||
|
if(tmp_str[len - 1] == '\n') {
|
||
|
tmp_str[len - 1] = '\0';
|
||
|
} else {
|
||
|
opal_output(opal_cr_output,
|
||
|
"opal_cr: extract_env_vars: Error: Parameter too long (%s)\n",
|
||
|
tmp_str);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if( NULL == (t_set = opal_argv_split(tmp_str, '=')) ) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
opal_setenv(t_set[0], t_set[1], true, &environ);
|
||
|
|
||
|
opal_argv_free(t_set);
|
||
|
}
|
||
|
|
||
|
cleanup:
|
||
|
if( NULL != env_data ) {
|
||
|
fclose(env_data);
|
||
|
}
|
||
|
unlink(file_name);
|
||
|
|
||
|
if( NULL != tmp_str ){
|
||
|
free(tmp_str);
|
||
|
}
|
||
|
|
||
|
return exit_status;
|
||
|
}
|
||
|
|
||
|
/*****************************************
|
||
|
* OPAL CR Entry Point Functionality
|
||
|
*****************************************/
|
||
|
/*
|
||
|
* Used only for debugging SIGPIPE problems
|
||
|
*/
|
||
|
static void opal_cr_sigpipe_debug_signal_handler (int signo)
|
||
|
{
|
||
|
int sleeper = 1;
|
||
|
|
||
|
if( !opal_cr_debug_sigpipe ) {
|
||
|
opal_output_verbose(10, opal_cr_output,
|
||
|
"opal_cr: sigpipe_debug: Debug SIGPIPE Not enabled :(\n");
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
opal_output(0,
|
||
|
"opal_cr: sigpipe_debug: Debug SIGPIPE [%d]: PID (%d)\n",
|
||
|
signo, getpid());
|
||
|
while(sleeper == 1 ) {
|
||
|
sleep(1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if OPAL_ENABLE_FT_THREAD == 1
|
||
|
static void* opal_cr_thread_fn(opal_object_t *obj)
|
||
|
{
|
||
|
/* Sanity Check */
|
||
|
if( !opal_cr_thread_use_if_avail ) {
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if( opal_cr_debug_sigpipe ) {
|
||
|
if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
|
||
|
;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Register this thread with the OPAL CRS
|
||
|
*/
|
||
|
if( NULL != opal_crs.crs_reg_thread ) {
|
||
|
if( OPAL_SUCCESS != opal_crs.crs_reg_thread() ) {
|
||
|
opal_output(0, "Error: Thread registration failed\n");
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if OPAL_ENABLE_CRDEBUG == 1
|
||
|
opal_cr_debug_free_threads[1] = opal_thread_get_self();
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* Wait to become active
|
||
|
*/
|
||
|
while( !opal_cr_thread_is_active && !opal_cr_thread_is_done) {
|
||
|
sched_yield();
|
||
|
}
|
||
|
|
||
|
if( opal_cr_thread_is_done ) {
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* While active
|
||
|
*/
|
||
|
while( opal_cr_thread_is_active && !opal_cr_thread_is_done) {
|
||
|
/*
|
||
|
* While no threads are in the MPI library then try to process
|
||
|
* checkpoint requests.
|
||
|
*/
|
||
|
OPAL_CR_THREAD_LOCK();
|
||
|
|
||
|
while ( !opal_cr_thread_in_library ) {
|
||
|
sched_yield();
|
||
|
usleep(opal_cr_thread_sleep_check);
|
||
|
|
||
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
||
|
/* Sanity check */
|
||
|
if( OPAL_UNLIKELY(opal_cr_currently_stalled) ) {
|
||
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* While they are in the MPI library yield
|
||
|
*/
|
||
|
OPAL_CR_THREAD_UNLOCK();
|
||
|
|
||
|
while ( opal_cr_thread_in_library && opal_cr_thread_is_active ) {
|
||
|
usleep(opal_cr_thread_sleep_wait);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
void opal_cr_thread_init_library(void)
|
||
|
{
|
||
|
if( !opal_cr_thread_use_if_avail ) {
|
||
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
||
|
} else {
|
||
|
/* Activate the CR Thread */
|
||
|
opal_cr_thread_in_library = false;
|
||
|
opal_cr_thread_is_done = false;
|
||
|
opal_cr_thread_is_active = true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void opal_cr_thread_finalize_library(void)
|
||
|
{
|
||
|
if( !opal_cr_thread_use_if_avail ) {
|
||
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
||
|
} else {
|
||
|
/* Deactivate the CR Thread */
|
||
|
opal_cr_thread_is_done = true;
|
||
|
opal_cr_thread_is_active = false;
|
||
|
OPAL_CR_LOCK();
|
||
|
opal_cr_thread_in_library = true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void opal_cr_thread_abort_library(void)
|
||
|
{
|
||
|
if( !opal_cr_thread_use_if_avail ) {
|
||
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
||
|
} else {
|
||
|
/* Deactivate the CR Thread */
|
||
|
opal_cr_thread_is_done = true;
|
||
|
opal_cr_thread_is_active = false;
|
||
|
OPAL_CR_LOCK();
|
||
|
opal_cr_thread_in_library = true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void opal_cr_thread_enter_library(void)
|
||
|
{
|
||
|
if( !opal_cr_thread_use_if_avail ) {
|
||
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
||
|
} else {
|
||
|
/* Lock out the CR Thread */
|
||
|
OPAL_CR_LOCK();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void opal_cr_thread_exit_library(void)
|
||
|
{
|
||
|
if( !opal_cr_thread_use_if_avail ) {
|
||
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
||
|
} else {
|
||
|
/* Allow CR Thread to continue */
|
||
|
OPAL_CR_UNLOCK();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void opal_cr_thread_noop_progress(void)
|
||
|
{
|
||
|
if( !opal_cr_thread_use_if_avail ) {
|
||
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#endif /* OPAL_ENABLE_FT_THREAD == 1 */
|
||
|
|
||
|
static double opal_cr_get_time() {
|
||
|
double wtime;
|
||
|
|
||
|
#if OPAL_TIMER_USEC_NATIVE
|
||
|
wtime = (double)opal_timer_base_get_usec() / 1000000.0;
|
||
|
#else
|
||
|
struct timeval tv;
|
||
|
gettimeofday(&tv, NULL);
|
||
|
wtime = tv.tv_sec;
|
||
|
wtime += (double)tv.tv_usec / 1000000.0;
|
||
|
#endif
|
||
|
|
||
|
return wtime;
|
||
|
}
|
||
|
|
||
|
void opal_cr_set_time(int idx)
|
||
|
{
|
||
|
if(idx < OPAL_CR_TIMER_MAX ) {
|
||
|
if( timer_start[idx] <= 0.0 ) {
|
||
|
timer_start[idx] = opal_cr_get_time();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void opal_cr_clear_timers(void)
|
||
|
{
|
||
|
int i;
|
||
|
for(i = 0; i < OPAL_CR_TIMER_MAX; ++i) {
|
||
|
timer_start[i] = 0.0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void display_indv_timer_core(double diff, char *str) {
|
||
|
double total = 0;
|
||
|
double perc = 0;
|
||
|
|
||
|
total = timer_start[OPAL_CR_TIMER_MAX-1] - timer_start[OPAL_CR_TIMER_ENTRY0];
|
||
|
perc = (diff/total) * 100;
|
||
|
|
||
|
opal_output(0,
|
||
|
"opal_cr: timing: %-20s = %10.2f s\t%10.2f s\t%6.2f\n",
|
||
|
str,
|
||
|
diff,
|
||
|
total,
|
||
|
perc);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
void opal_cr_display_all_timers(void)
|
||
|
{
|
||
|
double diff = 0.0;
|
||
|
char * label = NULL;
|
||
|
|
||
|
if( opal_cr_timing_target_rank != opal_cr_timing_my_rank ) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
opal_output(0, "OPAL CR Timing: ******************** Summary Begin\n");
|
||
|
|
||
|
/********** Entry into the system **********/
|
||
|
label = strdup("Start Entry Point");
|
||
|
if( opal_cr_timing_barrier_enabled ) {
|
||
|
diff = timer_start[OPAL_CR_TIMER_CRCPBR0] - timer_start[OPAL_CR_TIMER_ENTRY0];
|
||
|
} else {
|
||
|
diff = timer_start[OPAL_CR_TIMER_CRCP0] - timer_start[OPAL_CR_TIMER_ENTRY0];
|
||
|
}
|
||
|
display_indv_timer_core(diff, label);
|
||
|
free(label);
|
||
|
|
||
|
/********** CRCP Protocol **********/
|
||
|
label = strdup("CRCP Protocol");
|
||
|
if( opal_cr_timing_barrier_enabled ) {
|
||
|
diff = timer_start[OPAL_CR_TIMER_CRCPBR1] - timer_start[OPAL_CR_TIMER_CRCP0];
|
||
|
} else {
|
||
|
diff = timer_start[OPAL_CR_TIMER_P2P0] - timer_start[OPAL_CR_TIMER_CRCP0];
|
||
|
}
|
||
|
display_indv_timer_core(diff, label);
|
||
|
free(label);
|
||
|
|
||
|
/********** P2P Suspend **********/
|
||
|
label = strdup("P2P Suspend");
|
||
|
if( opal_cr_timing_barrier_enabled ) {
|
||
|
diff = timer_start[OPAL_CR_TIMER_P2PBR0] - timer_start[OPAL_CR_TIMER_P2P0];
|
||
|
} else {
|
||
|
diff = timer_start[OPAL_CR_TIMER_CORE0] - timer_start[OPAL_CR_TIMER_P2P0];
|
||
|
}
|
||
|
display_indv_timer_core(diff, label);
|
||
|
free(label);
|
||
|
|
||
|
/********** Checkpoint to Disk **********/
|
||
|
label = strdup("Checkpoint");
|
||
|
diff = timer_start[OPAL_CR_TIMER_CORE1] - timer_start[OPAL_CR_TIMER_CORE0];
|
||
|
display_indv_timer_core(diff, label);
|
||
|
free(label);
|
||
|
|
||
|
/********** P2P Reactivation **********/
|
||
|
label = strdup("P2P Reactivation");
|
||
|
if( opal_cr_timing_barrier_enabled ) {
|
||
|
diff = timer_start[OPAL_CR_TIMER_P2PBR2] - timer_start[OPAL_CR_TIMER_CORE1];
|
||
|
} else {
|
||
|
diff = timer_start[OPAL_CR_TIMER_CRCP1] - timer_start[OPAL_CR_TIMER_CORE1];
|
||
|
}
|
||
|
display_indv_timer_core(diff, label);
|
||
|
free(label);
|
||
|
|
||
|
/********** CRCP Protocol Finalize **********/
|
||
|
label = strdup("CRCP Cleanup");
|
||
|
if( opal_cr_timing_barrier_enabled ) {
|
||
|
diff = timer_start[OPAL_CR_TIMER_COREBR1] - timer_start[OPAL_CR_TIMER_CRCP1];
|
||
|
} else {
|
||
|
diff = timer_start[OPAL_CR_TIMER_CORE2] - timer_start[OPAL_CR_TIMER_CRCP1];
|
||
|
}
|
||
|
display_indv_timer_core(diff, label);
|
||
|
free(label);
|
||
|
|
||
|
/********** Exit the system **********/
|
||
|
label = strdup("Finish Entry Point");
|
||
|
diff = timer_start[OPAL_CR_TIMER_ENTRY4] - timer_start[OPAL_CR_TIMER_CORE2];
|
||
|
display_indv_timer_core(diff, label);
|
||
|
free(label);
|
||
|
|
||
|
opal_output(0, "OPAL CR Timing: ******************** Summary End\n");
|
||
|
}
|
||
|
|
||
|
#if OPAL_ENABLE_CRDEBUG == 1
|
||
|
int opal_cr_debug_set_current_ckpt_thread_self(void)
|
||
|
{
|
||
|
int t;
|
||
|
|
||
|
if( NULL == opal_cr_debug_free_threads ) {
|
||
|
opal_cr_debug_num_free_threads = 3;
|
||
|
opal_cr_debug_free_threads = (opal_thread_t **)malloc(sizeof(opal_thread_t *) * opal_cr_debug_num_free_threads );
|
||
|
for(t = 0; t < opal_cr_debug_num_free_threads; ++t ) {
|
||
|
opal_cr_debug_free_threads[t] = NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
opal_cr_debug_free_threads[0] = opal_thread_get_self();
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
int opal_cr_debug_clear_current_ckpt_thread(void)
|
||
|
{
|
||
|
opal_cr_debug_free_threads[0] = NULL;
|
||
|
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
int MPIR_checkpoint_debugger_detach(void) {
|
||
|
/* This function is meant to be a noop function for checkpoint/restart
|
||
|
* enabled debugging functionality */
|
||
|
#if 0
|
||
|
/* Once the debugger can successfully force threads into the function below,
|
||
|
* then we can uncomment this line */
|
||
|
if( MPIR_debug_with_checkpoint ) {
|
||
|
opal_cr_debug_threads_already_waiting = true;
|
||
|
}
|
||
|
#endif
|
||
|
return OPAL_SUCCESS;
|
||
|
}
|
||
|
|
||
|
void MPIR_checkpoint_debugger_signal_handler(int signo)
|
||
|
{
|
||
|
opal_output_verbose(1, opal_cr_output,
|
||
|
"crs: MPIR_checkpoint_debugger_signal_handler(): Enter Debug signal handler...");
|
||
|
|
||
|
MPIR_checkpoint_debugger_waitpoint();
|
||
|
|
||
|
opal_output_verbose(1, opal_cr_output,
|
||
|
"crs: MPIR_checkpoint_debugger_signal_handler(): Leave Debug signal handler...");
|
||
|
}
|
||
|
|
||
|
void *MPIR_checkpoint_debugger_waitpoint(void)
|
||
|
{
|
||
|
int t;
|
||
|
opal_thread_t *thr = NULL;
|
||
|
|
||
|
thr = opal_thread_get_self();
|
||
|
|
||
|
/*
|
||
|
* Sanity check, if the debugger is not going to attach, then do not wait
|
||
|
* Make sure to open the debug gate, so that threads can get out
|
||
|
*/
|
||
|
if( !MPIR_debug_with_checkpoint ) {
|
||
|
opal_output_verbose(1, opal_cr_output,
|
||
|
"crs: MPIR_checkpoint_debugger_waitpoint(): Debugger is not attaching... (%d)",
|
||
|
(int)thr->t_handle);
|
||
|
MPIR_checkpoint_debug_gate = 1;
|
||
|
return NULL;
|
||
|
}
|
||
|
else {
|
||
|
opal_output_verbose(1, opal_cr_output,
|
||
|
"crs: MPIR_checkpoint_debugger_waitpoint(): Waiting for the Debugger to attach... (%d)",
|
||
|
(int)thr->t_handle);
|
||
|
MPIR_checkpoint_debug_gate = 0;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Let special threads escape without waiting, they will wait later
|
||
|
*/
|
||
|
for(t = 0; t < opal_cr_debug_num_free_threads; ++t) {
|
||
|
if( opal_cr_debug_free_threads[t] != NULL &&
|
||
|
opal_thread_self_compare(opal_cr_debug_free_threads[t]) ) {
|
||
|
opal_output_verbose(1, opal_cr_output,
|
||
|
"crs: MPIR_checkpoint_debugger_waitpoint(): Checkpointing thread does not wait here... (%d)",
|
||
|
(int)thr->t_handle);
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Force all other threads into the waiting function,
|
||
|
* unless they are already in there, then just return so we do not nest
|
||
|
* calls into this wait function and potentially confuse the debugger.
|
||
|
*/
|
||
|
if( opal_cr_debug_threads_already_waiting ) {
|
||
|
opal_output_verbose(1, opal_cr_output,
|
||
|
"crs: MPIR_checkpoint_debugger_waitpoint(): Threads are already waiting from debugger detach, do not wait here... (%d)",
|
||
|
(int)thr->t_handle);
|
||
|
return NULL;
|
||
|
} else {
|
||
|
opal_output_verbose(1, opal_cr_output,
|
||
|
"crs: MPIR_checkpoint_debugger_waitpoint(): Wait... (%d)",
|
||
|
(int)thr->t_handle);
|
||
|
return MPIR_checkpoint_debugger_breakpoint();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* A tight loop to wait for debugger to release this process from the
|
||
|
* breakpoint.
|
||
|
*/
|
||
|
void *MPIR_checkpoint_debugger_breakpoint(void)
|
||
|
{
|
||
|
/* spin until debugger attaches and releases us */
|
||
|
while (MPIR_checkpoint_debug_gate == 0) {
|
||
|
#if defined(HAVE_USLEEP)
|
||
|
usleep(100000); /* microseconds */
|
||
|
#else
|
||
|
sleep(1); /* seconds */
|
||
|
#endif
|
||
|
}
|
||
|
opal_cr_debug_threads_already_waiting = false;
|
||
|
return NULL;
|
||
|
}
|
||
|
#endif
|