This eliminates possible race conditions related to signal delivery and
'select', which in turn means we can pass 'select' an infinite timeout
without any risk of missing a signal. It also allows us to structure
the code around a single event loop.
* modules/shepherd.scm (maybe-signal-port): New procedure.
(main): Use it and define 'signal-port'. Add 'ports' argument to
'next-command'. Pass it to 'select'. Change the timeout argument of
'select' to #f when SIGNAL-PORT is true.
* modules/shepherd/service.scm (fork+exec-command): Call
'unblock-signals' in the child process, right before 'exec-command'.
---
modules/shepherd.scm | 73 ++++++++++++++++++++++++++++++++----
modules/shepherd/service.scm | 19 ++++++----
2 files changed, 78 insertions(+), 14 deletions(-)
Toggle diff (154 lines)
diff --git a/modules/shepherd.scm b/modules/shepherd.scm
index 46faab6..ac60070 100644
--- a/modules/shepherd.scm
+++ b/modules/shepherd.scm
@@ -24,6 +24,7 @@
#:use-module (ice-9 format)
#:use-module (ice-9 rdelim) ;; Line-based I/O.
#:autoload (ice-9 readline) (activate-readline) ;for interactive use
+ #:use-module ((ice-9 threads) #:select (all-threads))
#:use-module (oop goops) ;; Defining classes and methods.
#:use-module (srfi srfi-1) ;; List library.
#:use-module (srfi srfi-26)
@@ -60,6 +61,53 @@ socket file at FILE-NAME upon exit of PROC. Return the values of PROC."
(close sock)
(catch-system-error (delete-file file-name))))))
+(define (maybe-signal-port signals)
+ "Return a signal port for SIGNALS, using 'signalfd' on GNU/Linux, or #f if
+that is not supported."
+ (catch 'system-error
+ (lambda ()
+ (let ((port (signalfd -1 signals)))
+ ;; As per the signalfd(2) man page, block SIGNALS. The tricky bit is
+ ;; that SIGNALS must be blocked for all the threads; new threads will
+ ;; inherit the signal mask, but we must ensure that neither Guile's
+ ;; signal delivery thread nor its finalization thread are already
+ ;; running, because if they do, they are not blocking SIGNALS. The
+ ;; signal delivery thread is started on the first call to 'sigaction'
+ ;; so we arrange to not call 'sigaction' beforehand; as for the
+ ;; finalization thread, use 'without-automatic-finalization' to
+ ;; temporarily stop it.
+ (without-automatic-finalization
+ (let ((count (length (all-threads))))
+ (if (= 1 count)
+ (begin
+ (block-signals signals)
+ port)
+ (begin
+ (local-output (l10n "warning: \
+already ~a threads running, disabling 'signalfd' support")
+ count)
+ (close-port port)
+ #f))))))
+ (lambda args
+ (if (= ENOSYS (system-error-errno args))
+ #f
+ (apply throw args)))))
+
+(define (handle-signal-port port)
+ "Read from PORT, a signalfd port, and handle the signal accordingly."
+ (let ((signal (consume-signalfd-siginfo port)))
+ (cond ((= signal SIGCHLD)
+ (handle-SIGCHLD))
+ ((= signal SIGINT)
+ (catch 'quit
+ (lambda ()
+ (stop root-service))
+ quit-exception-handler))
+ ((memv signal (list SIGTERM SIGHUP))
+ (stop root-service))
+ (else
+ #f))))
+
;; Main program.
(define (main . args)
@@ -82,6 +130,12 @@ socket file at FILE-NAME upon exit of PROC. Return the values of PROC."
(= EINVAL errno) ;PR_SET_CHILD_SUBREAPER unavailable
(apply throw args)))))))
+ (define signal-port
+ ;; Attempt to create a "signal port" via 'signalfd'. This must be called
+ ;; before the 'sigaction' procedure is called, because 'sigaction' spawns
+ ;; the signal thread.
+ (maybe-signal-port (list SIGCHLD SIGINT SIGTERM SIGHUP)))
+
(initialize-cli)
(let ((config-file #f)
@@ -281,7 +335,9 @@ socket file at FILE-NAME upon exit of PROC. Return the values of PROC."
;; "Failed to autoload handle-SIGCHLD in (ice-9 readline):"
(handle-SIGCHLD)
- (let next-command ()
+ (let next-command ((ports (if signal-port
+ (list signal-port sock)
+ (list sock))))
(define (read-from sock)
(match (accept sock)
((command-source . client-address)
@@ -289,14 +345,17 @@ socket file at FILE-NAME upon exit of PROC. Return the values of PROC."
(process-connection command-source))
(_ #f)))
- ;; XXX: Until we use signalfd(2), there's always a time window
+ ;; When not using signalfd(2), there's always a time window
;; before 'select' during which a handler async can be queued
;; but not executed. Work around it by exiting 'select' every
;; few seconds.
- (match (select (list sock) (list) (list)
- (if poll-services? 0.5 30))
- (((sock) _ _)
- (read-from sock))
+ (match (select ports (list) (list)
+ (and (not signal-port)
+ (if poll-services? 0.5 30)))
+ (((port _ ...) _ _)
+ (if (and signal-port (eq? port signal-port))
+ (handle-signal-port port)
+ (read-from sock)))
(_
;; 'select' returned an empty set, probably due to EINTR.
;; Explicitly call the SIGCHLD handler because we cannot be
@@ -306,7 +365,7 @@ socket file at FILE-NAME upon exit of PROC. Return the values of PROC."
(when poll-services?
(check-for-dead-services))
- (next-command))))))))
+ (next-command ports))))))))
;; Start all of SERVICES, which is a list of canonical names (FIXME?),
;; but in a order where all dependencies are fulfilled before we
diff --git a/modules/shepherd/service.scm b/modules/shepherd/service.scm
index 45fcf32..a202a98 100644
--- a/modules/shepherd/service.scm
+++ b/modules/shepherd/service.scm
@@ -898,13 +898,18 @@ its PID."
(pid (and (sigaction SIGTERM SIG_DFL)
(primitive-fork))))
(if (zero? pid)
- (exec-command command
- #:user user
- #:group group
- #:log-file log-file
- #:directory directory
- #:file-creation-mask file-creation-mask
- #:environment-variables environment-variables)
+ (begin
+ ;; Unblock any signals that might have been blocked by the parent
+ ;; process.
+ (unblock-signals (list SIGCHLD SIGINT SIGHUP SIGTERM))
+
+ (exec-command command
+ #:user user
+ #:group group
+ #:log-file log-file
+ #:directory directory
+ #:file-creation-mask file-creation-mask
+ #:environment-variables environment-variables))
(begin
;; Restore the initial SIGTERM handler.
(sigaction SIGTERM term-handler)
--
2.26.2