summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2018-10-09 09:53:02 +0200
committerLudovic Courtès <ludo@gnu.org>2018-10-09 18:54:12 +0200
commitfe17037b387c6eca0c45f0526d2761e982a192bb (patch)
tree72a5117986a4f5d983c9e3bf360ad0d8845eddd4
parent276f368051ff52cf202ede9fce579e49d9d744ec (diff)
downloadguix-patches-fe17037b387c6eca0c45f0526d2761e982a192bb.tar
guix-patches-fe17037b387c6eca0c45f0526d2761e982a192bb.tar.gz
status: Gracefully handle invalid UTF-8 in build logs.
* guix/status.scm (maybe-utf8->string): New procedure. (build-event-output-port): Use it in lieu of 'utf8->string'. * tests/status.scm ("build-output-port, UTF-8") ("current-build-output-port, UTF-8 + garbage"): New tests.
-rw-r--r--guix/status.scm19
-rw-r--r--tests/status.scm22
2 files changed, 39 insertions, 2 deletions
diff --git a/guix/status.scm b/guix/status.scm
index c6956066fd..13537c70cd 100644
--- a/guix/status.scm
+++ b/guix/status.scm
@@ -34,6 +34,7 @@
#:use-module (ice-9 match)
#:use-module (ice-9 format)
#:use-module (ice-9 binary-ports)
+ #:autoload (ice-9 rdelim) (read-string)
#:use-module (rnrs bytevectors)
#:use-module ((system foreign)
#:select (bytevector->pointer pointer->bytevector))
@@ -429,6 +430,22 @@ ON-CHANGE can display the build status, build events, etc."
(define %newline
(char-set #\return #\newline))
+(define (maybe-utf8->string bv)
+ "Attempt to decode BV as UTF-8 string and return it. Gracefully handle the
+case where BV does not contain only valid UTF-8."
+ (catch 'decoding-error
+ (lambda ()
+ (utf8->string bv))
+ (lambda _
+ ;; This is the sledgehammer but it's the only safe way we have to
+ ;; properly handle this. It's expensive but it's rarely needed.
+ (let ((port (open-bytevector-input-port bv)))
+ (set-port-encoding! port "UTF-8")
+ (set-port-conversion-strategy! port 'substitute)
+ (let ((str (read-string port)))
+ (close-port port)
+ str)))))
+
(define* (build-event-output-port proc #:optional (seed (build-status)))
"Return an output port for use as 'current-build-output-port' that calls
PROC with its current state value, initialized with SEED, on every build
@@ -464,7 +481,7 @@ The second return value is a thunk to retrieve the current state."
(pointer->bytevector ptr count)))
(define (write! bv offset count)
- (let loop ((str (utf8->string (bytevector-range bv offset count))))
+ (let loop ((str (maybe-utf8->string (bytevector-range bv offset count))))
(match (string-index str %newline)
((? integer? cr)
(let ((tail (string-take str (+ 1 cr))))
diff --git a/tests/status.scm b/tests/status.scm
index 04dedb702c..486ad04dd2 100644
--- a/tests/status.scm
+++ b/tests/status.scm
@@ -20,7 +20,9 @@
#:use-module (guix status)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
- #:use-module (srfi srfi-64))
+ #:use-module (srfi srfi-64)
+ #:use-module (rnrs bytevectors)
+ #:use-module (rnrs io ports))
(test-begin "status")
@@ -112,4 +114,22 @@
(display "@ substituter-succeeded baz\n" port)
(list first (get-status)))))
+(test-equal "build-output-port, UTF-8"
+ '((build-log "lambda is λ!\n"))
+ (let-values (((port get-status) (build-event-output-port cons '()))
+ ((bv) (string->utf8 "lambda is λ!\n")))
+ (put-bytevector port bv)
+ (force-output port)
+ (get-status)))
+
+(test-equal "current-build-output-port, UTF-8 + garbage"
+ ;; What about a mixture of UTF-8 + garbage?
+ '((build-log "garbage: �lambda: λ\n"))
+ (let-values (((port get-status) (build-event-output-port cons '())))
+ (display "garbage: " port)
+ (put-bytevector port #vu8(128))
+ (put-bytevector port (string->utf8 "lambda: λ\n"))
+ (force-output port)
+ (get-status)))
+
(test-end "status")