summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--guix/build/utils.scm57
-rw-r--r--tests/build-utils.scm24
2 files changed, 69 insertions, 12 deletions
diff --git a/guix/build/utils.scm b/guix/build/utils.scm
index f0ea83085e..76180e67e0 100644
--- a/guix/build/utils.scm
+++ b/guix/build/utils.scm
@@ -2,7 +2,7 @@
;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2013 Andreas Enge <andreas@enge.fr>
;;; Copyright © 2013 Nikita Karetnikov <nikita@karetnikov.org>
-;;; Copyright © 2015, 2018 Mark H Weaver <mhw@netris.org>
+;;; Copyright © 2015, 2018, 2021 Mark H Weaver <mhw@netris.org>
;;; Copyright © 2018 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2018, 2019 Ricardo Wurmus <rekado@elephly.net>
;;; Copyright © 2020 Efraim Flashner <efraim@flashner.co.il>
@@ -784,6 +784,31 @@ PROC's result is returned."
(lambda (key . args)
(false-if-exception (delete-file template))))))
+(define (unused-private-use-code-point s)
+ "Find a code point within a Unicode Private Use Area that is not
+present in S, and return the corresponding character object. If one
+cannot be found, return false."
+ (define (scan lo hi)
+ (and (<= lo hi)
+ (let ((c (integer->char lo)))
+ (if (string-index s c)
+ (scan (+ lo 1) hi)
+ c))))
+ (or (scan #xE000 #xF8FF)
+ (scan #xF0000 #xFFFFD)
+ (scan #x100000 #x10FFFD)))
+
+(define (replace-char c1 c2 s)
+ "Return a string which is equal to S except with all instances of C1
+replaced by C2. If C1 and C2 are equal, return S."
+ (if (char=? c1 c2)
+ s
+ (string-map (lambda (c)
+ (if (char=? c c1)
+ c2
+ c))
+ s)))
+
(define (substitute file pattern+procs)
"PATTERN+PROCS is a list of regexp/two-argument-procedure pairs. For each
line of FILE, and for each PATTERN that it matches, call the corresponding
@@ -802,16 +827,26 @@ end of a line; by itself it won't match the terminating newline of a line."
(let loop ((line (read-line in 'concat)))
(if (eof-object? line)
#t
- (let ((line (fold (lambda (r+p line)
- (match r+p
- ((regexp . proc)
- (match (list-matches regexp line)
- ((and m+ (_ _ ...))
- (proc line m+))
- (_ line)))))
- line
- rx+proc)))
- (display line out)
+ ;; Work around the fact that Guile's regexp-exec does not handle
+ ;; NUL characters (a limitation of the underlying GNU libc's
+ ;; regexec) by temporarily replacing them by an unused private
+ ;; Unicode code point.
+ ;; TODO: Use SRFI-115 instead, once available in Guile.
+ (let* ((nul* (or (and (string-index line #\nul)
+ (unused-private-use-code-point line))
+ #\nul))
+ (line* (replace-char #\nul nul* line))
+ (line1* (fold (lambda (r+p line)
+ (match r+p
+ ((regexp . proc)
+ (match (list-matches regexp line)
+ ((and m+ (_ _ ...))
+ (proc line m+))
+ (_ line)))))
+ line*
+ rx+proc))
+ (line1 (replace-char nul* #\nul line1*)))
+ (display line1 out)
(loop (read-line in 'concat)))))))))
diff --git a/tests/build-utils.scm b/tests/build-utils.scm
index 654b480ed9..31be7ff80f 100644
--- a/tests/build-utils.scm
+++ b/tests/build-utils.scm
@@ -1,6 +1,7 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2012, 2015, 2016, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2019 Ricardo Wurmus <rekado@elephly.net>
+;;; Copyright © 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -18,7 +19,7 @@
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
-(define-module (test-build-utils)
+(define-module (test build-utils)
#:use-module (guix tests)
#:use-module (guix build utils)
#:use-module ((guix utils)
@@ -241,4 +242,25 @@ print('hello world')"))
"/some/other/path")))
#f)))))
+(test-equal "substitute*, text contains a NUL byte, UTF-8"
+ "c\0d"
+ (with-fluids ((%default-port-encoding "UTF-8")
+ (%default-port-conversion-strategy 'error))
+ ;; The GNU libc is locale sensitive. Depending on the value of LANG, the
+ ;; test could fail with "string contains #\\nul character: ~S" or "cannot
+ ;; convert wide string to output locale".
+ (setlocale LC_ALL "en_US.UTF-8")
+ (call-with-temporary-output-file
+ (lambda (file port)
+ (format port "a\0b")
+ (flush-output-port port)
+
+ (substitute* file
+ (("a") "c")
+ (("b") "d"))
+
+ (with-input-from-file file
+ (lambda _
+ (get-string-all (current-input-port))))))))
+
(test-end)