From 79d3603bf15b8f815136178be8c8a236734a7596 Mon Sep 17 00:00:00 2001
When a process is forked, and before its GID is changed in "exec-command",
it will share the parent GID, which is 0 for Shepherd. In that case, use
the PID instead of the PGID.
Also make sure to reset the SIGTERM handler before forking a process. Failing
to do so, will result in stopping Shepherd if a SIGTERM is received between
fork and execv calls. Restore the SIGTERM handler once the process has been
forked.
* modules/shepherd/service.scm (fork+exec-command): Save the current SIGTERM
handler and reset it before forking. Then, restore it on the parent after
forking.
(make-kill-destructor): Handle the case when PGID is zero, between the process
fork and exec.
---
modules/shepherd/service.scm | 33 ++++++++++++++++++++++++++-------
tests/forking-service.sh | 18 ++++++++++++++++++
2 files changed, 44 insertions(+), 7 deletions(-)
Toggle diff (100 lines)
diff --git a/modules/shepherd/service.scm b/modules/shepherd/service.scm
index 8604d2f..45fcf32 100644
--- a/modules/shepherd/service.scm
+++ b/modules/shepherd/service.scm
@@ -5,6 +5,7 @@
;; Copyright (C) 2016 Alex Kost <alezost@gmail.com>
;; Copyright (C) 2018 Carlo Zancanaro <carlo@zancanaro.id.au>
;; Copyright (C) 2019 Ricardo Wurmus <rekado@elephly.net>
+;; Copyright (C) 2020 Mathieu Othacehe <m.othacehe@gmail.com>
;;
;; This file is part of the GNU Shepherd.
;;
@@ -884,7 +885,18 @@ its PID."
(unless %sigchld-handler-installed?
(sigaction SIGCHLD handle-SIGCHLD SA_NOCLDSTOP)
(set! %sigchld-handler-installed? #t))
- (let ((pid (primitive-fork)))
+
+ ;; When forking a process, the signal handlers are inherited, until it
+ ;; forks. If SIGTERM is received by the forked process, before it calls
+ ;; execv, the installed SIGTERM handler, stopping Shepherd will be called.
+ ;; To avoid this, save the SIGTERM handler, disable it, and restore it once,
+ ;; the process has been forked. This way, the forked process will use the
+ ;; default SIGTERM handler stopping the process.
+ (let ((term-handler (match (sigaction SIGTERM)
+ ((proc . _)
+ proc)))
+ (pid (and (sigaction SIGTERM SIG_DFL)
+ (primitive-fork))))
(if (zero? pid)
(exec-command command
#:user user
@@ -893,7 +905,10 @@ its PID."
#:directory directory
#:file-creation-mask file-creation-mask
#:environment-variables environment-variables)
- pid)))
+ (begin
+ ;; Restore the initial SIGTERM handler.
+ (sigaction SIGTERM term-handler)
+ pid))))
(define* (make-forkexec-constructor command
#:key
@@ -957,11 +972,15 @@ start."
"Return a procedure that sends SIGNAL to the process group of the PID given
as argument, where SIGNAL defaults to `SIGTERM'."
(lambda (pid . args)
- ;; Kill the whole process group PID belongs to. Don't assume that PID
- ;; is a process group ID: that's not the case when using #:pid-file,
- ;; where the process group ID is the PID of the process that
- ;; "daemonized".
- (kill (- (getpgid pid)) signal)
+ ;; Kill the whole process group PID belongs to. Don't assume that PID is
+ ;; a process group ID: that's not the case when using #:pid-file, where
+ ;; the process group ID is the PID of the process that "daemonized". If
+ ;; this procedure is called, between the process fork and exec, the PGID
+ ;; will still be zero (the Shepherd PGID). In that case, use the PID.
+ (let ((pgid (getpgid pid)))
+ (if (= (getpgid 0) pgid)
+ (kill pid signal) ;don't kill ourself
+ (kill (- pgid) signal)))
#f))
;; Produce a constructor that executes a command.
diff --git a/tests/forking-service.sh b/tests/forking-service.sh
index 7126c3d..bd9aac9 100644
--- a/tests/forking-service.sh
+++ b/tests/forking-service.sh
@@ -71,6 +71,17 @@ cat > "$conf"<<EOF
#:pid-file "$PWD/$service2_pid")
#:stop (make-kill-destructor)
#:respawn? #t))
+
+(define %command3
+ '("$SHELL" "-c" "sleep 600"))
+
+(register-services
+ (make <service>
+ ;; A service that forks into a different process.
+ #:provides '(test3)
+ #:start (make-forkexec-constructor %command3)
+ #:stop (make-kill-destructor)
+ #:respawn? #t))
EOF
cat $conf
@@ -113,3 +124,10 @@ sleep 1;
$herd status test2 | grep started
test "`cat $PWD/$service2_started`" = "started
started"
+
+# Try to trigger eventual race conditions, when killing a process between fork
+# and execv calls.
+for i in `seq 1 50`
+do
+ $herd restart test3
+done
--
2.26.2