Hi, Caleb Ristvedt skribis: > From 43ee61b405b01038b3e7c84aba64521ab8a62236 Mon Sep 17 00:00:00 2001 > From: Caleb Ristvedt > Date: Wed, 6 May 2020 11:52:16 -0500 > Subject: [PATCH 2/2] nar: 'with-temporary-store-file' uses a single connection > > Previously the 'with-store' form was entered every time a different temporary > file was tried. This caused there to be as many simultaneous open connections > as there were attempts, and prevented the (loop ...) call from being a tail > call. This change fixes that. > > * guix/nar.scm (with-temporary-store-file): open connection once prior to > entering the loop. > --- > guix/nar.scm | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/guix/nar.scm b/guix/nar.scm > index f91af72879..404cef8b97 100644 > --- a/guix/nar.scm > +++ b/guix/nar.scm > @@ -126,8 +126,8 @@ held." > (define-syntax-rule (with-temporary-store-file name body ...) > "Evaluate BODY with NAME bound to the file name of a temporary store item > protected from GC." > - (let loop ((name (temporary-store-file))) > - (with-store store > + (with-store store > + (let loop ((name (temporary-store-file))) > ;; Add NAME to the current process' roots. (Opening this connection to > ;; the daemon allows us to reuse its code that deals with the > ;; per-process roots file.) This change had an undesirable effect: the connection would be kept for the body of ‘with-temporary-store-file’, during which we’d call: finalize-store-file -> register-path which accesses the database. At this point, for each ‘guix offload’ process, we’d thus have the database open twice: once for the session’s guix-daemon, and once for that ‘register-path’ call. On berlin, the effect is that we see many ‘guix offload’ processes stalled because the SQLite database is busy: --8<---------------cut here---------------start------------->8--- ludo@berlin ~$ guix processes |grep ^SessionPID|wc -l 104 ludo@berlin ~$ guix processes |recsel -e 'ClientCommand ~ "offload"'|grep ^SessionPID |wc -l 69 ludo@berlin ~$ guix processes |recsel -e 'ClientCommand ~ "offload"'|head SessionPID: 10916 ClientPID: 7408 ClientCommand: /gnu/store/18hp7flyb3yid3yp49i6qcdq0sbi5l1n-guile-3.0.2/bin/guile \ /gnu/store/abiva5ivq99x30r2s9pa3jj0pv9g16sv-guix-1.1.0-4.bdc801e/bin/.guix-real offload x86_64-linux 3600 1 21600 SessionPID: 11333 ClientPID: 9505 ClientCommand: /gnu/store/18hp7flyb3yid3yp49i6qcdq0sbi5l1n-guile-3.0.2/bin/guile \ /gnu/store/abiva5ivq99x30r2s9pa3jj0pv9g16sv-guix-1.1.0-4.bdc801e/bin/.guix-real offload x86_64-linux 3600 1 21600 SessionPID: 16277 ClientPID: 9179 ludo@berlin ~$ sudo strace -p 7408 strace: Process 7408 attached restart_syscall(<... resuming interrupted read ...>) = 0 fcntl(19, F_SETLK, {l_type=F_RDLCK, l_whence=SEEK_SET, l_start=125, l_len=1}) = 0 fcntl(19, F_SETLK, {l_type=F_WRLCK, l_whence=SEEK_SET, l_start=120, l_len=1}) = -1 EAGAIN (Resource temporarily unavailable) fcntl(19, F_SETLK, {l_type=F_UNLCK, l_whence=SEEK_SET, l_start=125, l_len=1}) = 0 clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=100000000}, NULL) = 0 fcntl(19, F_SETLK, {l_type=F_RDLCK, l_whence=SEEK_SET, l_start=125, l_len=1}) = 0 fcntl(19, F_SETLK, {l_type=F_WRLCK, l_whence=SEEK_SET, l_start=120, l_len=1}) = -1 EAGAIN (Resource temporarily unavailable) fcntl(19, F_SETLK, {l_type=F_UNLCK, l_whence=SEEK_SET, l_start=125, l_len=1}) = 0 clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=100000000}, NULL) = 0 fcntl(19, F_SETLK, {l_type=F_RDLCK, l_whence=SEEK_SET, l_start=125, l_len=1}) = 0 fcntl(19, F_SETLK, {l_type=F_WRLCK, l_whence=SEEK_SET, l_start=120, l_len=1}) = -1 EAGAIN (Resource temporarily unavailable) fcntl(19, F_SETLK, {l_type=F_UNLCK, l_whence=SEEK_SET, l_start=125, l_len=1}) = 0 clock_nanosleep(CLOCK_REALTIME, 0, {tv_sec=0, tv_nsec=100000000}, ^Cstrace: Process 7408 detached ludo@berlin ~$ sudo gdb -p 7408 […] (gdb) bt #0 0x00007f2e2aa327a1 in clock_nanosleep@GLIBC_2.2.5 () from target:/gnu/store/fa6wj5bxkj5ll1d7292a70knmyl7a0cr-glibc-2.31/lib/libc.so.6 #1 0x00007f2e2aa37c03 in nanosleep () from target:/gnu/store/fa6wj5bxkj5ll1d7292a70knmyl7a0cr-glibc-2.31/lib/libc.so.6 #2 0x00007f2e2aa611a4 in usleep () from target:/gnu/store/fa6wj5bxkj5ll1d7292a70knmyl7a0cr-glibc-2.31/lib/libc.so.6 #3 0x00007f2e1e8245ea in unixSleep () from target:/gnu/store/807c6g9xqrxdjyhm8wm1r6jjjmc8q4vs-sqlite-3.31.1/lib/libsqlite3.so.0 #4 0x00007f2e1e81f56e in sqliteDefaultBusyCallback () from target:/gnu/store/807c6g9xqrxdjyhm8wm1r6jjjmc8q4vs-sqlite-3.31.1/lib/libsqlite3.so.0 #5 0x00007f2e1e81f5d9 in sqlite3InvokeBusyHandler () from target:/gnu/store/807c6g9xqrxdjyhm8wm1r6jjjmc8q4vs-sqlite-3.31.1/lib/libsqlite3.so.0 #6 0x00007f2e1e877ec1 in sqlite3BtreeBeginTrans () from target:/gnu/store/807c6g9xqrxdjyhm8wm1r6jjjmc8q4vs-sqlite-3.31.1/lib/libsqlite3.so.0 #7 0x00007f2e1e89fc64 in sqlite3VdbeExec () from target:/gnu/store/807c6g9xqrxdjyhm8wm1r6jjjmc8q4vs-sqlite-3.31.1/lib/libsqlite3.so.0 #8 0x00007f2e1e8a6d09 in sqlite3_step () from target:/gnu/store/807c6g9xqrxdjyhm8wm1r6jjjmc8q4vs-sqlite-3.31.1/lib/libsqlite3.so.0 #9 0x00007f2e1e8a7add in sqlite3_exec () from target:/gnu/store/807c6g9xqrxdjyhm8wm1r6jjjmc8q4vs-sqlite-3.31.1/lib/libsqlite3.so.0 #10 0x00007f2e2af0466d in ffi_call_unix64 () from target:/gnu/store/bw15z9kh9c65ycc2vbhl2izwfwfva7p1-libffi-3.3/lib/libffi.so.7 #11 0x00007f2e2af02ac0 in ffi_call_int () from target:/gnu/store/bw15z9kh9c65ycc2vbhl2izwfwfva7p1-libffi-3.3/lib/libffi.so.7 #12 0x00007f2e2aff148e in scm_i_foreign_call () from target:/gnu/store/18hp7flyb3yid3yp49i6qcdq0sbi5l1n-guile-3.0.2/lib/libguile-3.0.so.1 --8<---------------cut here---------------end--------------->8--- They loop pretty much indefinitely on this and nothing (or very little) happens on the system. I’ll revert this patch but I’m happy to hear what you think, Caleb. Another reason to implement temp roots in Scheme, as it would allow us to not open a connection to the daemon from ‘guix offload’! Thanks, Ludo’.