;;; Copyright © 2019 Mark H Weaver ;;; ;;; This program is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation, either version 3 of the License, or ;;; (at your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with this program. If not, see . (use-modules (rnrs io ports)) (define (make-custom-textual-output-port id write! get-position set-position! close) (let (;; Allocate a per-port string buffer which will be used as a ;; temporary buffer for decoding, to avoid heap allocation ;; during normal operation. (buffer (make-string 4096)) ;; 'state' is the UTF-8 decoder state, which represents a ;; proper prefix of a well-formed UTF-8 byte sequence. These ;; are bytes that 'binary-write!' has accepted and reported as ;; having been written, although we are not able to decode ;; them into a character to pass to (textual) 'write!' until ;; more bytes arrive. (state 0)) (define (binary-write! bv start count) (call-with-values (lambda () ;; XXX FIXME: Consider performing this ;; decoding strictly. (utf8->string! state bv start (+ start count) buffer 0 (string-length buffer))) (lambda (new-state bv-pos char-count) (let* (;; Avoid calling write! with (char-count = 0) unless ;; (count = 0) was passed to us, because calling ;; 'write!' with count=0 has a special meaning: it ;; means to pass an EOF object to the byte/character ;; sink. (chars-accepted (if (and (zero? char-count) (not (zero? count))) 0 (write! buffer 0 char-count))) ;; Compute 'bytes-accepted' in such a way that the ;; bytes from STATE are not included, because they ;; were passed to us in previous calls, and are not ;; part of the bytevector range that we are now being ;; asked to write. However, it's important to note ;; that if 'write!' did not accept the bytes from ;; STATE, 'bytes-accepted' will be negative. We must ;; handle that case specially below. (bytes-accepted (- count (string-utf8-length (substring buffer chars-accepted char-count))))) ;; If 'bytes-accepted' is negative, that means the bytes ;; from STATE were not written. This can only happen if ;; 'chars-accepted' is 0, because 'write!' can only accept ;; whole code points, and the bytes from STATE are part of ;; at most a single code point. In this case, we must ;; leave STATE unchanged and return 0. (if (negative? bytes-accepted) 0 (begin (set! state new-state) bytes-accepted)))))) (define (binary-close) (set! buffer #f) (when close (close))) (define port (make-custom-binary-output-port id binary-write! get-position set-position! binary-close)) ;; Always use UTF-8 as the encoding for custom textual ports, as ;; an internal implementation detail, to ensure that all Unicode ;; characters will pass through regardless of the current locale. (set-port-encoding! port "UTF-8") port))