From 7e36edd80f23a5d34ce7b5b0fbe8ecfea061a826 Mon Sep 17 00:00:00 2001
From: Mike Kasick <mike@kasick.org>
Date: Mon, 6 Feb 2012 10:32:13 -0500
Subject: [PATCH 1/2] init: Safely restart services to avoid race conditions.

Previously, service restarts (either an explicit "restart", or a "stop,
start" pair) exhibited a race condition whereby the new (restarting)
service process was often spawned before the old (stopping) process had
terminated.  This may have resulted in the new service process failing to
acquire a limited resource (file lock, socket bind, etc.) that the old
process had not yet released.

Now, a stopping service remains in the SVC_RUNNING state until its exiting
process has been reaped by waitpid.  This prevents a "stop, start" sequence
from spawning a second service process before resources held by the first
are released.  This enables safe service restarts by stopping the service,
waiting for the old service process to terminate, and (only then) starting
the new service process.  In the event of "restarting" an already stopped
service, the previous behavior is maintained whereby the service is simply
started.

This scenario could be special-cased by the restart command, however, we
have observed instances where services are, unintentionally, stopped and
started "too quickly," and so simultaneous processes for the same service
should never be allowed.

Note that this commit alters the behaviors for explicit restarts of
critical and oneshot services.  Previously these serivces would simply be
restarted, whereas now, an explicit restart of a critical service counts as
a crash (which may result in a recovery reboot) and oneshot services go
into the disabled state.
---
 init/init.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/init/init.c b/init/init.c
index 1ee88a7c7..e0f2cf79c 100755
--- a/init/init.c
+++ b/init/init.c
@@ -369,10 +369,9 @@ void service_start(struct service *svc, const char *dynamic_args)
 /* The how field should be either SVC_DISABLED or SVC_RESET */
 static void service_stop_or_reset(struct service *svc, int how)
 {
-        /* we are no longer running, nor should we
-         * attempt to restart
-         */
-    svc->flags &= (~(SVC_RUNNING|SVC_RESTARTING));
+    /* The service is still SVC_RUNNING until its process exits, but if it has
+     * already exited it shoudn't attempt a restart yet. */
+    svc->flags &= (~SVC_RESTARTING);
 
     if ((how != SVC_DISABLED) && (how != SVC_RESET)) {
         /* Hrm, an illegal flag.  Default to SVC_DISABLED */

From b54f39fdd97c50e0e8dfa439722be0786f5e6f52 Mon Sep 17 00:00:00 2001
From: Mike Kasick <mike@kasick.org>
Date: Wed, 25 Jan 2012 23:48:46 -0500
Subject: [PATCH 2/2] init: Retain traditional restart behavior for critical
 and oneshot services.

Adds an SVC_RESTART state that's used for an explicit "restart" of a
running service.  This retains the traditional restart behavior for
critical and oneshot services (previously altered by 7e36edd8), whereby
these services are "simply restarted" instead of counting as a crash (for a
critical serivce) or going into the disabled state (for a oneshot service).
---
 init/builtins.c       |  3 +--
 init/init.c           | 31 ++++++++++++++++++++++++++-----
 init/init.h           |  2 ++
 init/signal_handler.c | 10 ++++++----
 4 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/init/builtins.c b/init/builtins.c
index 9aa2345e3..ad73e90b2 100644
--- a/init/builtins.c
+++ b/init/builtins.c
@@ -523,8 +523,7 @@ int do_restart(int nargs, char **args)
     struct service *svc;
     svc = service_find_by_name(args[1]);
     if (svc) {
-        service_stop(svc);
-        service_start(svc, NULL);
+        service_restart(svc);
     }
     return 0;
 }
diff --git a/init/init.c b/init/init.c
index e0f2cf79c..26adcc655 100755
--- a/init/init.c
+++ b/init/init.c
@@ -169,7 +169,7 @@ void service_start(struct service *svc, const char *dynamic_args)
          * state and immediately takes it out of the restarting
          * state if it was in there
          */
-    svc->flags &= (~(SVC_DISABLED|SVC_RESTARTING|SVC_RESET));
+    svc->flags &= (~(SVC_DISABLED|SVC_RESTARTING|SVC_RESET|SVC_RESTART));
     svc->time_started = 0;
 
         /* running processes require no additional work -- if
@@ -366,14 +366,14 @@ void service_start(struct service *svc, const char *dynamic_args)
         notify_service_state(svc->name, "running");
 }
 
-/* The how field should be either SVC_DISABLED or SVC_RESET */
+/* The how field should be either SVC_DISABLED, SVC_RESET, or SVC_RESTART */
 static void service_stop_or_reset(struct service *svc, int how)
 {
     /* The service is still SVC_RUNNING until its process exits, but if it has
      * already exited it shoudn't attempt a restart yet. */
     svc->flags &= (~SVC_RESTARTING);
 
-    if ((how != SVC_DISABLED) && (how != SVC_RESET)) {
+    if ((how != SVC_DISABLED) && (how != SVC_RESET) && (how != SVC_RESTART)) {
         /* Hrm, an illegal flag.  Default to SVC_DISABLED */
         how = SVC_DISABLED;
     }
@@ -405,6 +405,17 @@ void service_stop(struct service *svc)
     service_stop_or_reset(svc, SVC_DISABLED);
 }
 
+void service_restart(struct service *svc)
+{
+    if (svc->flags & SVC_RUNNING) {
+        /* Stop, wait, then start the service. */
+        service_stop_or_reset(svc, SVC_RESTART);
+    } else if (!(svc->flags & SVC_RESTARTING)) {
+        /* Just start the service since it's not running. */
+        service_start(svc, NULL);
+    } /* else: Service is restarting anyways. */
+}
+
 void property_changed(const char *name, const char *value)
 {
     if (property_triggers_enabled)
@@ -471,6 +482,17 @@ static void msg_stop(const char *name)
     }
 }
 
+static void msg_restart(const char *name)
+{
+    struct service *svc = service_find_by_name(name);
+
+    if (svc) {
+        service_restart(svc);
+    } else {
+        ERROR("no such service '%s'\n", name);
+    }
+}
+
 void handle_control_message(const char *msg, const char *arg)
 {
     if (!strcmp(msg,"start")) {
@@ -478,8 +500,7 @@ void handle_control_message(const char *msg, const char *arg)
     } else if (!strcmp(msg,"stop")) {
         msg_stop(arg);
     } else if (!strcmp(msg,"restart")) {
-        msg_stop(arg);
-        msg_start(arg);
+        msg_restart(arg);
     } else {
         ERROR("unknown control msg '%s'\n", msg);
     }
diff --git a/init/init.h b/init/init.h
index 58bbbfe9b..c20864ad3 100644
--- a/init/init.h
+++ b/init/init.h
@@ -72,6 +72,7 @@ struct svcenvinfo {
 #define SVC_RESET       0x40  /* Use when stopping a process, but not disabling
                                  so it can be restarted with its class */
 #define SVC_RC_DISABLED 0x80  /* Remember if the disabled flag was set in the rc script */
+#define SVC_RESTART     0x100 /* Use to safely restart (stop, wait, start) a service */
 
 #define NR_SVC_SUPP_GIDS 12    /* twelve supplementary groups */
 
@@ -129,6 +130,7 @@ void service_for_each_flags(unsigned matchflags,
                             void (*func)(struct service *svc));
 void service_stop(struct service *svc);
 void service_reset(struct service *svc);
+void service_restart(struct service *svc);
 void service_start(struct service *svc, const char *dynamic_args);
 void property_changed(const char *name, const char *value);
 
diff --git a/init/signal_handler.c b/init/signal_handler.c
index b17013256..672904d4b 100644
--- a/init/signal_handler.c
+++ b/init/signal_handler.c
@@ -63,7 +63,7 @@ static int wait_for_one_process(int block)
 
     NOTICE("process '%s', pid %d exited\n", svc->name, pid);
 
-    if (!(svc->flags & SVC_ONESHOT)) {
+    if (!(svc->flags & SVC_ONESHOT) || (svc->flags & SVC_RESTART)) {
         kill(-pid, SIGKILL);
         NOTICE("process '%s' killing any children in process group\n", svc->name);
     }
@@ -78,8 +78,9 @@ static int wait_for_one_process(int block)
     svc->pid = 0;
     svc->flags &= (~SVC_RUNNING);
 
-        /* oneshot processes go into the disabled state on exit */
-    if (svc->flags & SVC_ONESHOT) {
+        /* oneshot processes go into the disabled state on exit,
+         * except when manually restarted. */
+    if ((svc->flags & SVC_ONESHOT) && !(svc->flags & SVC_RESTART)) {
         svc->flags |= SVC_DISABLED;
     }
 
@@ -90,7 +91,7 @@ static int wait_for_one_process(int block)
     }
 
     now = gettime();
-    if (svc->flags & SVC_CRITICAL) {
+    if ((svc->flags & SVC_CRITICAL) && !(svc->flags & SVC_RESTART)) {
         if (svc->time_crashed + CRITICAL_CRASH_WINDOW >= now) {
             if (++svc->nr_crashed > CRITICAL_CRASH_THRESHOLD) {
                 ERROR("critical process '%s' exited %d times in %d minutes; "
@@ -105,6 +106,7 @@ static int wait_for_one_process(int block)
         }
     }
 
+    svc->flags &= (~SVC_RESTART);
     svc->flags |= SVC_RESTARTING;
 
     /* Execute all onrestart commands for this service. */