Reported by Ricardo Wurmus <rekado@elephly.net>.
With this change, the manifest file created for:
guix install r r-seurat r-cistopic r-monocle3 r-cicero-monocle3 r-assertthat
goes from 5.6M to 192K. Likewise, on this profile, wall-clock time of:
GUIX_PROFILING=gc guix package -I
goes from 0.7s to 0.1s, with heap usage going from 55M to 9M.
* guix/profiles.scm (manifest->gexp)[entry->gexp]: Turn into a monadic
procedure. Return a 'repeated' sexp if ENTRY was already visited
before.
Adjust caller accordingly. Bump manifest version.
(sexp->manifest)[sexp->manifest-entry]: Turn into a monadic procedure.
Add case for 'repeated' nodes. Add each entry to the current state
vhash.
Add clause for version 4 manifests.
* tests/profiles.scm ("deduplication of repeated entries"): New test.
* guix/build/profiles.scm (manifest-sexp->inputs+search-paths): Expect
version 4. Add clause for 'repeated' nodes.
---
guix/build/profiles.scm | 4 +-
guix/profiles.scm | 127 ++++++++++++++++++++++++++++------------
tests/profiles.scm | 42 +++++++++++++
3 files changed, 134 insertions(+), 39 deletions(-)
Toggle diff (236 lines)
diff --git a/guix/build/profiles.scm b/guix/build/profiles.scm
index f9875ca92e..c4460f624b 100644
--- a/guix/build/profiles.scm
+++ b/guix/build/profiles.scm
@@ -150,7 +150,7 @@ (define (manifest-sexp->inputs+search-paths manifest)
values: the list of store items of its manifest entries, and the list of
search path specifications."
(match manifest ;this must match 'manifest->gexp'
- (('manifest ('version 3)
+ (('manifest ('version 4)
('packages (entries ...)))
(let loop ((entries entries)
(inputs '())
@@ -162,6 +162,8 @@ (define (manifest-sexp->inputs+search-paths manifest)
(loop (append rest deps) ;breadth-first traversal
(cons item inputs)
(append paths search-paths)))
+ ((('repeated name version item) . rest)
+ (loop rest inputs search-paths))
(()
(values (reverse inputs)
(delete-duplicates
diff --git a/guix/profiles.scm b/guix/profiles.scm
index bf50c00a1e..44ff37e75b 100644
--- a/guix/profiles.scm
+++ b/guix/profiles.scm
@@ -455,31 +455,53 @@ (define (inferior->entry)
(define (manifest->gexp manifest)
"Return a representation of MANIFEST as a gexp."
(define (entry->gexp entry)
- (match entry
- (($ <manifest-entry> name version output (? string? path)
- (deps ...) (search-paths ...) _ (properties ...))
- #~(#$name #$version #$output #$path
- (propagated-inputs #$(map entry->gexp deps))
- (search-paths #$(map search-path-specification->sexp
- search-paths))
- #$@(if (null? properties)
- #~()
- #~((properties . #$properties)))))
- (($ <manifest-entry> name version output package
- (deps ...) (search-paths ...) _ (properties ...))
- #~(#$name #$version #$output
- (ungexp package (or output "out"))
- (propagated-inputs #$(map entry->gexp deps))
- (search-paths #$(map search-path-specification->sexp
- search-paths))
- #$@(if (null? properties)
- #~()
- #~((properties . #$properties)))))))
+ ;; Maintain in state monad a vhash of visited entries, indexed by their
+ ;; item, usually package objects (we cannot use the entry itself as an
+ ;; index since identical entries are usually not 'eq?'). Use that vhash
+ ;; to avoid repeating duplicate entries. This is particularly useful in
+ ;; the presence of propagated inputs, where we could otherwise end up
+ ;; repeating large trees.
+ (mlet %state-monad ((visited (current-state)))
+ (if (match (vhash-assq (manifest-entry-item entry) visited)
+ ((_ . previous-entry)
+ (manifest-entry=? previous-entry entry))
+ (#f #f))
+ (return #~(repeated #$(manifest-entry-name entry)
+ #$(manifest-entry-version entry)
+ #$(manifest-entry-item entry)))
+ (mbegin %state-monad
+ (set-current-state (vhash-consq (manifest-entry-item entry)
+ entry visited))
+ (mlet %state-monad ((deps (mapm %state-monad entry->gexp
+ (manifest-entry-dependencies entry))))
+ (return
+ (match entry
+ (($ <manifest-entry> name version output (? string? path)
+ (_deps ...) (search-paths ...) _ (properties ...))
+ #~(#$name #$version #$output #$path
+ (propagated-inputs #$deps)
+ (search-paths #$(map search-path-specification->sexp
+ search-paths))
+ #$@(if (null? properties)
+ #~()
+ #~((properties . #$properties)))))
+ (($ <manifest-entry> name version output package
+ (_deps ...) (search-paths ...) _ (properties ...))
+ #~(#$name #$version #$output
+ (ungexp package (or output "out"))
+ (propagated-inputs #$deps)
+ (search-paths #$(map search-path-specification->sexp
+ search-paths))
+ #$@(if (null? properties)
+ #~()
+ #~((properties . #$properties))))))))))))
(match manifest
(($ <manifest> (entries ...))
- #~(manifest (version 3)
- (packages #$(map entry->gexp entries))))))
+ #~(manifest (version 4)
+ (packages #$(run-with-state
+ (mapm %state-monad entry->gexp entries)
+ vlist-null))))))
(define (find-package name version)
"Return a package from the distro matching NAME and possibly VERSION. This
@@ -522,25 +544,44 @@ (define (infer-dependency item parent)
(define* (sexp->manifest-entry sexp #:optional (parent (delay #f)))
(match sexp
+ (('repeated name version path)
+ ;; This entry is the same as another one encountered earlier; look it
+ ;; up and return it.
+ (mlet %state-monad ((visited (current-state))
+ (key -> (list name version path)))
+ (match (vhash-assoc key visited)
+ (#f
+ (raise (formatted-message
+ (G_ "invalid repeated entry in profile: ~s")
+ sexp)))
+ ((_ . entry)
+ (return entry)))))
((name version output path
('propagated-inputs deps)
('search-paths search-paths)
extra-stuff ...)
- ;; For each of DEPS, keep a promise pointing to ENTRY.
- (letrec* ((deps* (map (cut sexp->manifest-entry <> (delay entry))
- deps))
- (entry (manifest-entry
- (name name)
- (version version)
- (output output)
- (item path)
- (dependencies deps*)
- (search-paths (map sexp->search-path-specification
- search-paths))
- (parent parent)
- (properties (or (assoc-ref extra-stuff 'properties)
- '())))))
- entry))))
+ (mlet* %state-monad
+ ((entry -> #f)
+ (deps* (mapm %state-monad
+ (cut sexp->manifest-entry <> (delay entry))
+ deps))
+ (visited (current-state))
+ (key -> (list name version path)))
+ (set! entry ;XXX: emulate 'letrec*'
+ (manifest-entry
+ (name name)
+ (version version)
+ (output output)
+ (item path)
+ (dependencies deps*)
+ (search-paths (map sexp->search-path-specification
+ search-paths))
+ (parent parent)
+ (properties (or (assoc-ref extra-stuff 'properties)
+ '()))))
+ (mbegin %state-monad
+ (set-current-state (vhash-cons key entry visited))
+ (return entry))))))
(match sexp
(('manifest ('version 0)
@@ -608,7 +649,17 @@ (define* (sexp->manifest-entry sexp #:optional (parent (delay #f)))
;; Version 3 represents DEPS as full-blown manifest entries.
(('manifest ('version 3 minor-version ...)
('packages (entries ...)))
- (manifest (map sexp->manifest-entry entries)))
+ (manifest (run-with-state
+ (mapm %state-monad sexp->manifest-entry entries)
+ vlist-null)))
+
+ ;; Version 4 deduplicates repeated entries, as can happen with deep
+ ;; propagated input trees.
+ (('manifest ('version 4 minor-version ...)
+ ('packages (entries ...)))
+ (manifest (run-with-state
+ (mapm %state-monad sexp->manifest-entry entries)
+ vlist-null)))
(_
(raise (condition
(&message (message "unsupported manifest format")))))))
diff --git a/tests/profiles.scm b/tests/profiles.scm
index 7e51d37ab9..3838d971c9 100644
--- a/tests/profiles.scm
+++ b/tests/profiles.scm
@@ -586,6 +586,48 @@ (define (entry->sexp entry)
#:locales? #f)))
(return #f)))))
+(test-assertm "deduplication of repeated entries"
+ ;; Make sure the 'manifest' file does not duplicate identical entries.
+ ;; See <https://issues.guix.gnu.org/55499>.
+ (mlet* %store-monad ((p0 -> (dummy-package "p0"
+ (build-system trivial-build-system)
+ (arguments
+ `(#:guile ,%bootstrap-guile
+ #:builder (mkdir (assoc-ref %outputs "out"))))
+ (propagated-inputs
+ `(("guile" ,%bootstrap-guile)))))
+ (p1 -> (package
+ (inherit p0)
+ (name "p1")))
+ (drv (profile-derivation (packages->manifest
+ (list p0 p1))
+ #:hooks '()
+ #:locales? #f)))
+ (mbegin %store-monad
+ (built-derivations (list drv))
+ (let ((file (string-append (derivation->output-path drv)
+ "/manifest"))
+ (manifest (profile-manifest (derivation->output-path drv))))
+ (define (contains-repeated? sexp)
+ (match sexp
+ (('repeated _ ...) #t)
+ ((lst ...) (any contains-repeated? sexp))
+ (_ #f)))
+
+ (return (and (contains-repeated? (call-with-input-file file read))
+
+ ;; MANIFEST has two entries for %BOOTSTRAP-GUILE since
+ ;; it's propagated both from P0 and from P1. When
+ ;; reading a 'repeated' node, 'read-manifest' should
+ ;; reuse the previously-read entry so the two
+ ;; %BOOTSTRAP-GUILE entries must be 'eq?'.
+ (match (manifest-entries manifest)
+ (((= manifest-entry-dependencies (dep0))
+ (= manifest-entry-dependencies (dep1)))
+ (and (string=? (manifest-entry-name dep0)
+ (package-name %bootstrap-guile))
+ (eq? dep0 dep1))))))))))
+
(test-assertm "no collision"
;; Here we have an entry that is "lowered" (its 'item' field is a store file
;; name) and another entry (its 'item' field is a package) that is
--
2.36.1