From 5817e222faf46f76fbdb66ba8fd6c8cd643aefb5 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Wed, 20 Jul 2022 19:11:21 +0200 Subject: style: Move reader and printer to (guix read-print). * guix/scripts/style.scm (, read-with-comments) (vhashq, %special-forms, %newline-forms, prefix?) (special-form-lead, newline-form?, escaped-string) (string-width, canonicalize-comment, pretty-print-with-comments) (object->string*): Move to... * guix/read-print.scm: ... here. New file. * guix/scripts/import.scm: Adjust accordingly. * tests/style.scm: Move 'test-pretty-print' and tests to... * tests/read-print.scm: ... here. New file. * Makefile.am (MODULES): Add 'guix/read-print.scm'. (SCM_TESTS): Add 'tests/read-print.scm'. --- guix/read-print.scm | 490 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 490 insertions(+) create mode 100644 guix/read-print.scm (limited to 'guix/read-print.scm') diff --git a/guix/read-print.scm b/guix/read-print.scm new file mode 100644 index 0000000000..69ab8ac8b3 --- /dev/null +++ b/guix/read-print.scm @@ -0,0 +1,490 @@ +;;; GNU Guix --- Functional package management for GNU +;;; Copyright © 2021-2022 Ludovic Courtès +;;; +;;; This file is part of GNU Guix. +;;; +;;; GNU Guix is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; GNU Guix is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with GNU Guix. If not, see . + +(define-module (guix read-print) + #:use-module (ice-9 control) + #:use-module (ice-9 match) + #:use-module (ice-9 rdelim) + #:use-module (ice-9 vlist) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-9) + #:export (pretty-print-with-comments + read-with-comments + object->string* + + comment? + comment->string + comment-margin? + canonicalize-comment)) + +;;; Commentary: +;;; +;;; This module provides a comment-preserving reader and a comment-preserving +;;; pretty-printer smarter than (ice-9 pretty-print). +;;; +;;; Code: + + +;;; +;;; Comment-preserving reader. +;;; + +;; A comment. +(define-record-type + (comment str margin?) + comment? + (str comment->string) + (margin? comment-margin?)) + +(define (read-with-comments port) + "Like 'read', but include objects when they're encountered." + ;; Note: Instead of implementing this functionality in 'read' proper, which + ;; is the best approach long-term, this code is a layer on top of 'read', + ;; such that we don't have to rely on a specific Guile version. + (define dot (list 'dot)) + (define (dot? x) (eq? x dot)) + + (define (reverse/dot lst) + ;; Reverse LST and make it an improper list if it contains DOT. + (let loop ((result '()) + (lst lst)) + (match lst + (() result) + (((? dot?) . rest) + (let ((dotted (reverse rest))) + (set-cdr! (last-pair dotted) (car result)) + dotted)) + ((x . rest) (loop (cons x result) rest))))) + + (let loop ((blank-line? #t) + (return (const 'unbalanced))) + (match (read-char port) + ((? eof-object? eof) + eof) ;oops! + (chr + (cond ((eqv? chr #\newline) + (loop #t return)) + ((char-set-contains? char-set:whitespace chr) + (loop blank-line? return)) + ((memv chr '(#\( #\[)) + (let/ec return + (let liip ((lst '())) + (liip (cons (loop (match lst + (((? comment?) . _) #t) + (_ #f)) + (lambda () + (return (reverse/dot lst)))) + lst))))) + ((memv chr '(#\) #\])) + (return)) + ((eq? chr #\') + (list 'quote (loop #f return))) + ((eq? chr #\`) + (list 'quasiquote (loop #f return))) + ((eq? chr #\,) + (list (match (peek-char port) + (#\@ + (read-char port) + 'unquote-splicing) + (_ + 'unquote)) + (loop #f return))) + ((eqv? chr #\;) + (unread-char chr port) + (comment (read-line port 'concat) + (not blank-line?))) + (else + (unread-char chr port) + (match (read port) + ((and token '#{.}#) + (if (eq? chr #\.) dot token)) + (token token)))))))) + +;;; +;;; Comment-preserving pretty-printer. +;;; + +(define-syntax vhashq + (syntax-rules (quote) + ((_) vlist-null) + ((_ (key (quote (lst ...))) rest ...) + (vhash-consq key '(lst ...) (vhashq rest ...))) + ((_ (key value) rest ...) + (vhash-consq key '((() . value)) (vhashq rest ...))))) + +(define %special-forms + ;; Forms that are indented specially. The number is meant to be understood + ;; like Emacs' 'scheme-indent-function' symbol property. When given an + ;; alist instead of a number, the alist gives "context" in which the symbol + ;; is a special form; for instance, context (modify-phases) means that the + ;; symbol must appear within a (modify-phases ...) expression. + (vhashq + ('begin 1) + ('lambda 2) + ('lambda* 2) + ('match-lambda 1) + ('match-lambda* 2) + ('define 2) + ('define* 2) + ('define-public 2) + ('define*-public 2) + ('define-syntax 2) + ('define-syntax-rule 2) + ('define-module 2) + ('define-gexp-compiler 2) + ('let 2) + ('let* 2) + ('letrec 2) + ('letrec* 2) + ('match 2) + ('when 2) + ('unless 2) + ('package 1) + ('origin 1) + ('operating-system 1) + ('modify-inputs 2) + ('modify-phases 2) + ('add-after '(((modify-phases) . 3))) + ('add-before '(((modify-phases) . 3))) + ('replace '(((modify-phases) . 2))) ;different from 'modify-inputs' + ('substitute* 2) + ('substitute-keyword-arguments 2) + ('call-with-input-file 2) + ('call-with-output-file 2) + ('with-output-to-file 2) + ('with-input-from-file 2))) + +(define %newline-forms + ;; List heads that must be followed by a newline. The second argument is + ;; the context in which they must appear. This is similar to a special form + ;; of 1, except that indent is 1 instead of 2 columns. + (vhashq + ('arguments '(package)) + ('sha256 '(origin source package)) + ('base32 '(sha256 origin)) + ('git-reference '(uri origin source)) + ('search-paths '(package)) + ('native-search-paths '(package)) + ('search-path-specification '()))) + +(define (prefix? candidate lst) + "Return true if CANDIDATE is a prefix of LST." + (let loop ((candidate candidate) + (lst lst)) + (match candidate + (() #t) + ((head1 . rest1) + (match lst + (() #f) + ((head2 . rest2) + (and (equal? head1 head2) + (loop rest1 rest2)))))))) + +(define (special-form-lead symbol context) + "If SYMBOL is a special form in the given CONTEXT, return its number of +arguments; otherwise return #f. CONTEXT is a stack of symbols lexically +surrounding SYMBOL." + (match (vhash-assq symbol %special-forms) + (#f #f) + ((_ . alist) + (any (match-lambda + ((prefix . level) + (and (prefix? prefix context) (- level 1)))) + alist)))) + +(define (newline-form? symbol context) + "Return true if parenthesized expressions starting with SYMBOL must be +followed by a newline." + (match (vhash-assq symbol %newline-forms) + (#f #f) + ((_ . prefix) + (prefix? prefix context)))) + +(define (escaped-string str) + "Return STR with backslashes and double quotes escaped. Everything else, in +particular newlines, is left as is." + (list->string + `(#\" + ,@(string-fold-right (lambda (chr lst) + (match chr + (#\" (cons* #\\ #\" lst)) + (#\\ (cons* #\\ #\\ lst)) + (_ (cons chr lst)))) + '() + str) + #\"))) + +(define (string-width str) + "Return the \"width\" of STR--i.e., the width of the longest line of STR." + (apply max (map string-length (string-split str #\newline)))) + +(define (canonicalize-comment c) + "Canonicalize comment C, ensuring it has the \"right\" number of leading +semicolons." + (let ((line (string-trim-both + (string-trim (comment->string c) (char-set #\;))))) + (comment (string-append + (if (comment-margin? c) + ";" + (if (string-null? line) + ";;" ;no trailing space + ";; ")) + line "\n") + (comment-margin? c)))) + +(define* (pretty-print-with-comments port obj + #:key + (format-comment identity) + (indent 0) + (max-width 78) + (long-list 5)) + "Pretty-print OBJ to PORT, attempting to at most MAX-WIDTH character columns +and assuming the current column is INDENT. Comments present in OBJ are +included in the output. + +Lists longer than LONG-LIST are written as one element per line. Comments are +passed through FORMAT-COMMENT before being emitted; a useful value for +FORMAT-COMMENT is 'canonicalize-comment'." + (define (list-of-lists? head tail) + ;; Return true if HEAD and TAIL denote a list of lists--e.g., a list of + ;; 'let' bindings. + (match head + ((thing _ ...) ;proper list + (and (not (memq thing + '(quote quasiquote unquote unquote-splicing))) + (pair? tail))) + (_ #f))) + + (let loop ((indent indent) + (column indent) + (delimited? #t) ;true if comes after a delimiter + (context '()) ;list of "parent" symbols + (obj obj)) + (define (print-sequence context indent column lst delimited?) + (define long? + (> (length lst) long-list)) + + (let print ((lst lst) + (first? #t) + (delimited? delimited?) + (column column)) + (match lst + (() + column) + ((item . tail) + (define newline? + ;; Insert a newline if ITEM is itself a list, or if TAIL is long, + ;; but only if ITEM is not the first item. Also insert a newline + ;; before a keyword. + (and (or (pair? item) long? + (and (keyword? item) + (not (eq? item #:allow-other-keys)))) + (not first?) (not delimited?) + (not (comment? item)))) + + (when newline? + (newline port) + (display (make-string indent #\space) port)) + (let ((column (if newline? indent column))) + (print tail + (keyword? item) ;keep #:key value next to one another + (comment? item) + (loop indent column + (or newline? delimited?) + context + item))))))) + + (define (sequence-would-protrude? indent lst) + ;; Return true if elements of LST written at INDENT would protrude + ;; beyond MAX-WIDTH. This is implemented as a cheap test with false + ;; negatives to avoid actually rendering all of LST. + (find (match-lambda + ((? string? str) + (>= (+ (string-width str) 2 indent) max-width)) + ((? symbol? symbol) + (>= (+ (string-width (symbol->string symbol)) indent) + max-width)) + ((? boolean?) + (>= (+ 2 indent) max-width)) + (() + (>= (+ 2 indent) max-width)) + (_ ;don't know + #f)) + lst)) + + (define (special-form? head) + (special-form-lead head context)) + + (match obj + ((? comment? comment) + (if (comment-margin? comment) + (begin + (display " " port) + (display (comment->string (format-comment comment)) + port)) + (begin + ;; When already at the beginning of a line, for example because + ;; COMMENT follows a margin comment, no need to emit a newline. + (unless (= column indent) + (newline port) + (display (make-string indent #\space) port)) + (display (comment->string (format-comment comment)) + port))) + (display (make-string indent #\space) port) + indent) + (('quote lst) + (unless delimited? (display " " port)) + (display "'" port) + (loop indent (+ column (if delimited? 1 2)) #t context lst)) + (('quasiquote lst) + (unless delimited? (display " " port)) + (display "`" port) + (loop indent (+ column (if delimited? 1 2)) #t context lst)) + (('unquote lst) + (unless delimited? (display " " port)) + (display "," port) + (loop indent (+ column (if delimited? 1 2)) #t context lst)) + (('unquote-splicing lst) + (unless delimited? (display " " port)) + (display ",@" port) + (loop indent (+ column (if delimited? 2 3)) #t context lst)) + (('gexp lst) + (unless delimited? (display " " port)) + (display "#~" port) + (loop indent (+ column (if delimited? 2 3)) #t context lst)) + (('ungexp obj) + (unless delimited? (display " " port)) + (display "#$" port) + (loop indent (+ column (if delimited? 2 3)) #t context obj)) + (('ungexp-native obj) + (unless delimited? (display " " port)) + (display "#+" port) + (loop indent (+ column (if delimited? 2 3)) #t context obj)) + (('ungexp-splicing lst) + (unless delimited? (display " " port)) + (display "#$@" port) + (loop indent (+ column (if delimited? 3 4)) #t context lst)) + (('ungexp-native-splicing lst) + (unless delimited? (display " " port)) + (display "#+@" port) + (loop indent (+ column (if delimited? 3 4)) #t context lst)) + (((? special-form? head) arguments ...) + ;; Special-case 'let', 'lambda', 'modify-inputs', etc. so the second + ;; and following arguments are less indented. + (let* ((lead (special-form-lead head context)) + (context (cons head context)) + (head (symbol->string head)) + (total (length arguments))) + (unless delimited? (display " " port)) + (display "(" port) + (display head port) + (unless (zero? lead) + (display " " port)) + + ;; Print the first LEAD arguments. + (let* ((indent (+ column 2 + (if delimited? 0 1))) + (column (+ column 1 + (if (zero? lead) 0 1) + (if delimited? 0 1) + (string-length head))) + (initial-indent column)) + (define new-column + (let inner ((n lead) + (arguments (take arguments (min lead total))) + (column column)) + (if (zero? n) + (begin + (newline port) + (display (make-string indent #\space) port) + indent) + (match arguments + (() column) + ((head . tail) + (inner (- n 1) tail + (loop initial-indent column + (= n lead) + context + head))))))) + + ;; Print the remaining arguments. + (let ((column (print-sequence + context indent new-column + (drop arguments (min lead total)) + #t))) + (display ")" port) + (+ column 1))))) + ((head tail ...) + (let* ((overflow? (>= column max-width)) + (column (if overflow? + (+ indent 1) + (+ column (if delimited? 1 2)))) + (newline? (or (newline-form? head context) + (list-of-lists? head tail))) ;'let' bindings + (context (cons head context))) + (if overflow? + (begin + (newline port) + (display (make-string indent #\space) port)) + (unless delimited? (display " " port))) + (display "(" port) + + (let* ((new-column (loop column column #t context head)) + (indent (if (or (>= new-column max-width) + (not (symbol? head)) + (sequence-would-protrude? + (+ new-column 1) tail) + newline?) + column + (+ new-column 1)))) + (when newline? + ;; Insert a newline right after HEAD. + (newline port) + (display (make-string indent #\space) port)) + + (let ((column + (print-sequence context indent + (if newline? indent new-column) + tail newline?))) + (display ")" port) + (+ column 1))))) + (_ + (let* ((str (if (string? obj) + (escaped-string obj) + (object->string obj))) + (len (string-width str))) + (if (and (> (+ column 1 len) max-width) + (not delimited?)) + (begin + (newline port) + (display (make-string indent #\space) port) + (display str port) + (+ indent len)) + (begin + (unless delimited? (display " " port)) + (display str port) + (+ column (if delimited? 0 1) len)))))))) + +(define (object->string* obj indent . args) + "Pretty-print OBJ with INDENT columns as the initial indent. ARGS are +passed as-is to 'pretty-print-with-comments'." + (call-with-output-string + (lambda (port) + (apply pretty-print-with-comments port obj + #:indent indent + args)))) -- cgit v1.2.3