Toggle quote (436 lines)
> It appears this email is making it through, so here it is as an
> attachment:
>
> From 4a3c1fff8460a03bfb7c1aada9863205cd6f22fd Mon Sep 17 00:00:00 2001
> From: Pierre Langlois <pierre.langlois@gmx.com>
> Date: Tue, 29 Mar 2022 20:13:34 +0100
> Subject: [PATCH v7 06/32] build-system: Add tree-sitter-build-system.
>
> * guix/build-system/tree-sitter.scm: New module.
> * guix/build/tree-sitter-build-system.scm: Likewise.
> * Makefile.am (MODULES): Add them.
> * doc/guix.texi: Document it.
> ---
> Makefile.am | 2 +
> doc/guix.texi | 21 ++-
> guix/build-system/tree-sitter.scm | 190 ++++++++++++++++++++++++
> guix/build/tree-sitter-build-system.scm | 153 +++++++++++++++++++
> 4 files changed, 365 insertions(+), 1 deletion(-)
> create mode 100644 guix/build-system/tree-sitter.scm
> create mode 100644 guix/build/tree-sitter-build-system.scm
>
> diff --git a/Makefile.am b/Makefile.am
> index c3af23b68e..a16c4fcd7e 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -178,6 +178,7 @@ MODULES = \
> guix/build-system/ruby.scm \
> guix/build-system/scons.scm \
> guix/build-system/texlive.scm \
> + guix/build-system/tree-sitter.scm \
> guix/build-system/trivial.scm \
> guix/ftp-client.scm \
> guix/http-client.scm \
> @@ -234,6 +235,7 @@ MODULES = \
> guix/build/ruby-build-system.scm \
> guix/build/scons-build-system.scm \
> guix/build/texlive-build-system.scm \
> + guix/build/tree-sitter-build-system.scm \
> guix/build/waf-build-system.scm \
> guix/build/haskell-build-system.scm \
> guix/build/julia-build-system.scm \
> diff --git a/doc/guix.texi b/doc/guix.texi
> index e547d469f4..4e997f7176 100644
> --- a/doc/guix.texi
> +++ b/doc/guix.texi
> @@ -79,7 +79,7 @@ Copyright @copyright{} 2020 Jack Hill@*
> Copyright @copyright{} 2020 Naga Malleswari@*
> Copyright @copyright{} 2020, 2021 Brice Waegeneire@*
> Copyright @copyright{} 2020 R Veera Kumar@*
> -Copyright @copyright{} 2020, 2021 Pierre Langlois@*
> +Copyright @copyright{} 2020, 2021, 2022 Pierre Langlois@*
> Copyright @copyright{} 2020 pinoaffe@*
> Copyright @copyright{} 2020 André Batista@*
> Copyright @copyright{} 2020, 2021 Alexandru-Sergiu Marton@*
> @@ -9732,6 +9732,25 @@ be specified with the @code{#:node} parameter which defaults to
> @code{node}.
> @end defvr
>
> +@defvr {Scheme Variable} tree-sitter-build-system
> +
> +This variable is exported by @code{(guix build-system tree-sitter)}. It
> +implements procedures to compile grammars for the
> +@url{https://tree-sitter.github.io/tree-sitter/, Tree-sitter} parsing
> +library. It essentially runs @code{tree-sitter generate} to translate
> +@code{grammar.js} grammars to JSON and then to C. Which it then
> +compiles to native code.
> +
> +Tree-sitter packages may support multiple grammars, so this build system
> +supports a @code{#:grammar-directories} keyword to specify a list of
> +locations where a @code{grammar.js} file may be found.
> +
> +Grammars sometimes depend on each other, such as C++ depending on C and
> +TypeScript depending on JavaScript. You may use inputs to declare such
> +dependencies.
> +
> +@end defvr
> +
> Lastly, for packages that do not need anything as sophisticated, a
> ``trivial'' build system is provided. It is trivial in the sense that
> it provides basically no support: it does not pull any implicit inputs,
> diff --git a/guix/build-system/tree-sitter.scm b/guix/build-system/tree-sitter.scm
> new file mode 100644
> index 0000000000..aeb96e3ef5
> --- /dev/null
> +++ b/guix/build-system/tree-sitter.scm
> @@ -0,0 +1,190 @@
> +;;; GNU Guix --- Functional package management for GNU
> +;;; Copyright © 2022 Pierre Langlois <pierre.langlois@gmx.com>
> +;;;
> +;;; This file is part of GNU Guix.
> +;;;
> +;;; GNU Guix is free software; you can redistribute it and/or modify it
> +;;; under the terms of the GNU General Public License as published by
> +;;; the Free Software Foundation; either version 3 of the License, or (at
> +;;; your option) any later version.
> +;;;
> +;;; GNU Guix is distributed in the hope that it will be useful, but
> +;;; WITHOUT ANY WARRANTY; without even the implied warranty of
> +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +;;; GNU General Public License for more details.
> +;;;
> +;;; You should have received a copy of the GNU General Public License
> +;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
> +
> +(define-module (guix build-system tree-sitter)
> + #:use-module (guix store)
> + #:use-module (guix utils)
> + #:use-module (guix packages)
> + #:use-module (guix gexp)
> + #:use-module (guix monads)
> + #:use-module (guix search-paths)
> + #:use-module (guix build-system)
> + #:use-module (guix build-system gnu)
> + #:use-module (guix build-system node)
> + #:use-module (ice-9 match)
> + #:export (%tree-sitter-build-system-modules
> + tree-sitter-build
> + tree-sitter-build-system))
> +
> +(define %tree-sitter-build-system-modules
> + ;; Build-side modules imported by default.
> + `((guix build tree-sitter-build-system)
> + ,@%node-build-system-modules))
> +
> +(define* (lower name
> + #:key source inputs native-inputs outputs system target
> + #:allow-other-keys
> + #:rest arguments)
> + "Return a bag for NAME from the given arguments."
> + (define private-keywords
> + `(#:inputs #:native-inputs #:outputs ,@(if target
> + '()
> + '(#:target))))
> + (define node
> + (module-ref (resolve-interface '(gnu packages node))
> + 'node-lts))
> + (define tree-sitter
> + (module-ref (resolve-interface '(gnu packages tree-sitter))
> + 'tree-sitter))
> + (define tree-sitter-cli
> + (module-ref (resolve-interface '(gnu packages tree-sitter))
> + 'tree-sitter-cli))
> + ;; Grammars depend on each other via JS modules, which we package into a
> + ;; dedicated js output.
> + (define grammar-inputs
> + (map (match-lambda
> + ((name package)
> + `(,name ,package "js")))
> + inputs))
> + (bag
> + (name name)
> + (system system) (target target)
> + (build-inputs `(,@(if source
> + `(("source" ,source))
> + '())
> + ("node" ,node)
> + ("tree-sitter-cli" ,tree-sitter-cli)
> + ,@native-inputs
> + ,@(if target '() grammar-inputs)
> + ;; Keep the standard inputs of 'gnu-build-system'.
> + ,@(if target
> + (standard-cross-packages target 'host)
> + '())
> + ,@(standard-packages)))
> + (host-inputs `(("tree-sitter" ,tree-sitter)
> + ,@(if target grammar-inputs '())))
> + ;; Keep the standard inputs of 'gnu-buid-system'.
> + (target-inputs (if target
> + (standard-cross-packages target 'target)
> + '()))
> + ;; XXX: this is a hack to get around issue #41569.
> + (outputs (match outputs
> + (("out") (cons "js" outputs))
> + (_ outputs)))
> + (build (if target tree-sitter-cross-build tree-sitter-build))
> + (arguments (strip-keyword-arguments private-keywords arguments))))
> +
> +(define* (tree-sitter-build name inputs
> + #:key
> + source
> + (phases '%standard-phases)
> + (grammar-directories '("."))
> + (tests? #t)
> + (outputs '("out" "js"))
> + (search-paths '())
> + (system (%current-system))
> + (guile #f)
> + (imported-modules %tree-sitter-build-system-modules)
> + (modules '((guix build utils)
> + (guix build tree-sitter-build-system))))
> + (define builder
> + (with-imported-modules imported-modules
> + #~(begin
> + (use-modules #$@(sexp->gexp modules))
> + (tree-sitter-build #:name #$name
> + #:source #+source
> + #:system #$system
> + #:phases #$phases
> + #:tests? #$tests?
> + #:grammar-directories '#$grammar-directories
> + #:outputs #$(outputs->gexp outputs)
> + #:search-paths '#$(sexp->gexp
> + (map search-path-specification->sexp
> + search-paths))
> + #:inputs #$(input-tuples->gexp inputs)))))
> +
> + (mlet %store-monad ((guile (package->derivation (or guile (default-guile))
> + system #:graft? #f)))
> + (gexp->derivation name builder
> + #:system system
> + #:guile-for-build guile)))
> +
> +(define* (tree-sitter-cross-build name
> + #:key
> + target
> + build-inputs target-inputs host-inputs
> + guile source
> + (phases '%standard-phases)
> + (grammar-directories '("."))
> + (tests? #t)
> + (outputs '("out" "js"))
> + (search-paths '())
> + (native-search-paths '())
> + (system (%current-system))
> + (build (nix-system->gnu-triplet system))
> + (imported-modules %tree-sitter-build-system-modules)
> + (modules '((guix build utils)
> + (guix build tree-sitter-build-system))))
> + (define builder
> + (with-imported-modules imported-modules
> + #~(begin
> + (use-modules #$@(sexp->gexp modules))
> +
> + (define %build-host-inputs
> + #+(input-tuples->gexp build-inputs))
> +
> + (define %build-target-inputs
> + (append #$(input-tuples->gexp host-inputs)
> + #+(input-tuples->gexp target-inputs)))
> +
> + (define %build-inputs
> + (append %build-host-inputs %build-target-inputs))
> +
> + (tree-sitter-build #:name #$name
> + #:source #+source
> + #:system #$system
> + #:build #$build
> + #:target #$target
> + #:phases #$phases
> + #:tests? #$tests?
> + #:grammar-directories '#$grammar-directories
> + #:outputs #$(outputs->gexp outputs)
> + #:inputs %build-target-inputs
> + #:native-inputs %build-host-inputs
> + #:search-paths '#$(sexp->gexp
> + (map search-path-specification->sexp
> + search-paths))
> + #:native-search-paths '#$(sexp->gexp
> + (map
> + search-path-specification->sexp
> + native-search-paths))))))
> +
> + (mlet %store-monad ((guile (package->derivation (or guile (default-guile))
> + system #:graft? #f)))
> + (gexp->derivation name builder
> + #:system system
> + #:target target
> + #:guile-for-build guile)))
> +
> +(define tree-sitter-build-system
> + (build-system
> + (name 'tree-sitter)
> + (description "The Tree-sitter grammar build system")
> + (lower lower)))
> +
> +;;; tree-sitter.scm ends here
> diff --git a/guix/build/tree-sitter-build-system.scm b/guix/build/tree-sitter-build-system.scm
> new file mode 100644
> index 0000000000..574b0f2a1c
> --- /dev/null
> +++ b/guix/build/tree-sitter-build-system.scm
> @@ -0,0 +1,153 @@
> +;;; GNU Guix --- Functional package management for GNU
> +;;; Copyright © 2022 Pierre Langlois <pierre.langlois@gmx.com>
> +;;;
> +;;; This file is part of GNU Guix.
> +;;;
> +;;; GNU Guix is free software; you can redistribute it and/or modify it
> +;;; under the terms of the GNU General Public License as published by
> +;;; the Free Software Foundation; either version 3 of the License, or (at
> +;;; your option) any later version.
> +;;;
> +;;; GNU Guix is distributed in the hope that it will be useful, but
> +;;; WITHOUT ANY WARRANTY; without even the implied warranty of
> +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +;;; GNU General Public License for more details.
> +;;;
> +;;; You should have received a copy of the GNU General Public License
> +;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
> +
> +(define-module (guix build tree-sitter-build-system)
> + #:use-module ((guix build node-build-system) #:prefix node:)
> + #:use-module (guix build json)
> + #:use-module (guix build utils)
> + #:use-module (ice-9 match)
> + #:use-module (ice-9 regex)
> + #:use-module (srfi srfi-1)
> + #:export (%standard-phases
> + tree-sitter-build))
> +
> +;; Commentary:
> +;;
> +;; Build procedures for tree-sitter grammar packages. This is the
> +;; builder-side code, which builds on top fo the node build-system.
> +;;
> +;; Tree-sitter grammars are written in JavaScript and compiled to a native
> +;; shared object. The `tree-sitter generate' command invokes `node' in order
> +;; to evaluate the grammar.js into a grammar.json file, which is then
> +;; translated into C code. We then compile the C code ourselves. Packages
> +;; also sometimes add extra manually written C/C++ code.
> +;;
> +;; In order to support grammars depending on each other, such as C and C++,
> +;; JavaScript and TypeScript, this build-system installs the source of the
> +;; node module in a dedicated "js" output.
> +;;
> +;; Code:
> +
> +(define* (patch-dependencies #:key inputs #:allow-other-keys)
> + "Rewrite dependencies in 'package.json'. We remove all runtime dependencies
> +and replace development dependencies with tree-sitter grammar node modules."
> +
> + (define (rewrite package.json)
> + (map (match-lambda
> + (("dependencies" @ . _)
> + '("dependencies" @))
> + (("devDependencies" @ . _)
> + `("devDependencies" @
> + ,@(filter-map (match-lambda
> + ((key . directory)
> + (let ((node-module
> + (string-append directory
> + "/lib/node_modules/"
> + key)))
> + (and (directory-exists? node-module)
> + `(,key . ,node-module)))))
> + (alist-delete "node" inputs))))
> + (other other))
> + package.json))
> +
> + (node:with-atomic-json-file-replacement "package.json"
> + (match-lambda
> + (('@ . package.json)
> + (cons '@ (rewrite package.json))))))
> +
> +;; FIXME: The node build-system's configure phase does not support
> +;; cross-compiling so we re-define it.
> +(define* (configure #:key native-inputs inputs #:allow-other-keys)
> + (invoke (search-input-file (or native-inputs inputs) "/bin/npm")
> + "--offline" "--ignore-scripts" "install"))
> +
> +(define* (build #:key grammar-directories #:allow-other-keys)
> + (for-each (lambda (dir)
> + (with-directory-excursion dir
> + ;; Avoid generating binding code for other languages, we do
> + ;; not support this use-case yet and it relies on running
> + ;; `node-gyp' to build native addons.
> + (invoke "tree-sitter" "generate" "--no-bindings")))
> + grammar-directories))
> +
> +(define* (check #:key grammar-directories tests? #:allow-other-keys)
> + (when tests?
> + (for-each (lambda (dir)
> + (with-directory-excursion dir
> + (invoke "tree-sitter" "test")))
> + grammar-directories)))
> +
> +(define* (install #:key target grammar-directories outputs #:allow-other-keys)
> + (let ((lib (string-append (assoc-ref outputs "out")
> + "/lib/tree-sitter")))
> + (mkdir-p lib)
> + (define (compile-language dir)
> + (with-directory-excursion dir
> + (let ((lang (assoc-ref (call-with-input-file "src/grammar.json"
> + read-json)
> + "name"))
> + (source-file (lambda (path)
> + (if (file-exists? path)
> + path
> + #f))))
> + (apply invoke
> + `(,(if target
> + (string-append target "-g++")
> + "g++")
> + "-shared"
> + "-fPIC"
> + "-fno-exceptions"
> + "-O2"
> + "-g"
> + "-o" ,(string-append lib "/" lang ".so")
> + ;; An additional `scanner.{c,cc}' file is sometimes
> + ;; provided.
> + ,@(cond
> + ((source-file "src/scanner.c")
> + => (lambda (file) (list "-xc" "-std=c99" file)))
> + ((source-file "src/scanner.cc")
> + => (lambda (file) (list file)))
> + (else '()))
> + "-xc" "src/parser.c")))))
> + (for-each compile-language grammar-directories)))
> +
> +(define* (install-js #:key native-inputs inputs outputs #:allow-other-keys)
> + (invoke (search-input-file (or native-inputs inputs) "/bin/npm")
> + "--prefix" (assoc-ref outputs "js")
> + "--global"
> + "--offline"
> + "--loglevel" "info"
> + "--production"
> + ;; Skip scripts to prevent building bindings via GYP.
> + "--ignore-scripts"
> + "install" "../package.tgz"))
> +
> +(define %standard-phases
> + (modify-phases node:%standard-phases
> + (replace 'patch-dependencies patch-dependencies)
> + (replace 'configure configure)
> + (replace 'build build)
> + (replace 'check check)
> + (replace 'install install)
> + (add-after 'install 'install-js install-js)))
> +
> +(define* (tree-sitter-build #:key inputs (phases %standard-phases)
> + #:allow-other-keys #:rest args)
> + (apply node:node-build #:inputs inputs #:phases phases args))
> +
> +;;; tree-sitter-build-system.scm ends here