From 465d2cb286170933577de045e6e6dad7205bfe10 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Wed, 9 Dec 2020 21:50:21 +0100 Subject: serialization: 'fold-archive' notifies about directory processing completion. * guix/serialization.scm (fold-archive): Call PROC with a 'directory-complete tag when done with a directory. (restore-file): Handle it. * guix/scripts/archive.scm (list-contents): Likewise. * guix/scripts/challenge.scm (archive-contents): Likewise. * tests/nar.scm ("write-file-tree + fold-archive"): Adjust accordingly. --- tests/nar.scm | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tests/nar.scm') diff --git a/tests/nar.scm b/tests/nar.scm index aeff3d3330..b542ebd47c 100644 --- a/tests/nar.scm +++ b/tests/nar.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Ludovic Courtès +;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -218,8 +218,10 @@ '(("R" directory #f) ("R/dir" directory #f) ("R/dir/exe" executable "1234") + ("R/dir" directory-complete #f) ("R/foo" regular "abcdefg") - ("R/lnk" symlink "foo")) + ("R/lnk" symlink "foo") + ("R" directory-complete #f)) (let () (define-values (port get-bytevector) -- cgit v1.2.3 From ed7d02f7c198970ce3fe94bcee47592963326446 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Wed, 9 Dec 2020 22:16:35 +0100 Subject: serialization: 'restore-file' sets canonical timestamp and permissions. * guix/serialization.scm (restore-file): Set the permissions and mtime of FILE. * guix/nar.scm (finalize-store-file): Pass #:reset-timestamps? #f to 'register-items'. * tests/nar.scm (rm-rf): Add 'chmod' calls to ensure files are writable. ("write-file + restore-file with symlinks"): Ensure every file in OUTPUT passes 'canonical-file?'. * tests/guix-archive.sh: Run "chmod -R +w" before "rm -rf". --- guix/nar.scm | 8 +++++--- guix/serialization.scm | 14 +++++++++----- tests/guix-archive.sh | 4 ++-- tests/nar.scm | 12 ++++++++++-- 4 files changed, 26 insertions(+), 12 deletions(-) (limited to 'tests/nar.scm') diff --git a/guix/nar.scm b/guix/nar.scm index a23af2e5de..edfcc9aab5 100644 --- a/guix/nar.scm +++ b/guix/nar.scm @@ -114,10 +114,12 @@ held." ;; Install the new TARGET. (rename-file source target) - ;; Register TARGET. As a side effect, it resets the timestamps of all - ;; its files, recursively, and runs a deduplication pass. + ;; Register TARGET. As a side effect, run a deduplication pass. + ;; Timestamps and permissions are already correct thanks to + ;; 'restore-file'. (register-items db - (list (store-info target deriver references)))) + (list (store-info target deriver references)) + #:reset-timestamps? #f)) (when lock? (delete-file (string-append target ".lock")) diff --git a/guix/serialization.scm b/guix/serialization.scm index cc56134ef4..677ca60b66 100644 --- a/guix/serialization.scm +++ b/guix/serialization.scm @@ -459,23 +459,27 @@ depends on TYPE." (define (restore-file port file) "Read a file (possibly a directory structure) in Nar format from PORT. -Restore it as FILE." +Restore it as FILE with canonical permissions and timestamps." (fold-archive (lambda (file type content result) (match type ('directory (mkdir file)) ('directory-complete - #t) + (chmod file #o555) + (utime file 1 1 0 0)) ('symlink - (symlink content file)) + (symlink content file) + (utime file 1 1 0 0 AT_SYMLINK_NOFOLLOW)) ((or 'regular 'executable) (match content ((input . size) (call-with-output-file file (lambda (output) (dump input output size) - (when (eq? type 'executable) - (chmod output #o755))))))))) + (chmod output (if (eq? type 'executable) + #o555 + #o444)))) + (utime file 1 1 0 0)))))) #t port file)) diff --git a/tests/guix-archive.sh b/tests/guix-archive.sh index e796c62f9a..00b87ff0ac 100644 --- a/tests/guix-archive.sh +++ b/tests/guix-archive.sh @@ -1,5 +1,5 @@ # GNU Guix --- Functional package management for GNU -# Copyright © 2013, 2014, 2015, 2019 Ludovic Courtès +# Copyright © 2013, 2014, 2015, 2019, 2020 Ludovic Courtès # # This file is part of GNU Guix. # @@ -28,7 +28,7 @@ tmpdir="t-archive-dir-$$" rm -f "$archive" "$archive_alt" rm -rf "$tmpdir" -trap 'rm -f "$archive" "$archive_alt"; rm -rf "$tmpdir"' EXIT +trap 'rm -f "$archive" "$archive_alt"; chmod -R +w "$tmpdir"; rm -rf "$tmpdir"' EXIT guix archive --export guile-bootstrap > "$archive" guix archive --export guile-bootstrap:out > "$archive_alt" diff --git a/tests/nar.scm b/tests/nar.scm index b542ebd47c..59616659c8 100644 --- a/tests/nar.scm +++ b/tests/nar.scm @@ -136,8 +136,11 @@ (define (rm-rf dir) (file-system-fold (const #t) ; enter? (lambda (file stat result) ; leaf + (unless (eq? 'symlink (stat:type stat)) + (chmod file #o644)) (delete-file file)) - (const #t) ; down + (lambda (dir stat result) ; down + (chmod dir #o755)) (lambda (dir stat result) ; up (rmdir dir)) (const #t) ; skip @@ -363,7 +366,12 @@ (cut write-file input <>)) (call-with-input-file nar (cut restore-file <> output)) - (file-tree-equal? input output)) + + (and (file-tree-equal? input output) + (every (lambda (file) + (canonical-file? + (string-append output "/" file))) + '("root" "root/reg" "root/exe")))) (lambda () (false-if-exception (delete-file nar)) (false-if-exception (rm-rf output))))))) -- cgit v1.2.3 From 2718c29c3fb9f9de2ec897248ad49ae11ca39b7a Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Thu, 10 Dec 2020 11:21:14 +0100 Subject: nar: Deduplicate files right as they are restored. This avoids having to traverse and re-read the files that we have just restored, thereby reducing I/O. * guix/serialization.scm (dump-file): New procedure. (restore-file): Add #:dump-file parameter and honor it. * guix/store/deduplication.scm (tee, dump-file/deduplicate): New procedures. * guix/nar.scm (restore-one-item): Pass #:dump-file to 'restore-file'. (finalize-store-file): Pass #:deduplicate? #f to 'register-items'. * tests/nar.scm : Call 'setenv' to set "NIX_STORE". --- guix/nar.scm | 12 ++++++---- guix/serialization.scm | 27 ++++++++++++++------- guix/store/deduplication.scm | 57 +++++++++++++++++++++++++++++++++++++++++++- tests/nar.scm | 3 +++ 4 files changed, 85 insertions(+), 14 deletions(-) (limited to 'tests/nar.scm') diff --git a/guix/nar.scm b/guix/nar.scm index edfcc9aab5..ba035ca6dc 100644 --- a/guix/nar.scm +++ b/guix/nar.scm @@ -27,6 +27,7 @@ ;; (guix store) since this is "daemon-side" code. #:use-module (guix store) #:use-module (guix store database) + #:use-module ((guix store deduplication) #:select (dump-file/deduplicate)) #:use-module ((guix build store-copy) #:select (store-info)) #:use-module (guix i18n) @@ -114,12 +115,12 @@ held." ;; Install the new TARGET. (rename-file source target) - ;; Register TARGET. As a side effect, run a deduplication pass. - ;; Timestamps and permissions are already correct thanks to - ;; 'restore-file'. + ;; Register TARGET. The 'restore-file' call took care of + ;; deduplication, timestamps, and permissions. (register-items db (list (store-info target deriver references)) - #:reset-timestamps? #f)) + #:reset-timestamps? #f + #:deduplicate? #f)) (when lock? (delete-file (string-append target ".lock")) @@ -212,7 +213,8 @@ s-expression")) (let-values (((port get-hash) (open-sha256-input-port port))) (with-temporary-store-file temp - (restore-file port temp) + (restore-file port temp + #:dump-file dump-file/deduplicate) (let ((magic (read-int port))) (unless (= magic %export-magic) diff --git a/guix/serialization.scm b/guix/serialization.scm index 677ca60b66..9e2dce8bb0 100644 --- a/guix/serialization.scm +++ b/guix/serialization.scm @@ -457,9 +457,22 @@ depends on TYPE." (&message (message "unsupported nar entry type")) (&nar-read-error (port port) (file file) (token x))))))))) -(define (restore-file port file) +(define (dump-file file input size type) + "Dump SIZE bytes from INPUT to FILE." + (call-with-output-file file + (lambda (output) + (dump input output size)))) + +(define* (restore-file port file + #:key (dump-file dump-file)) "Read a file (possibly a directory structure) in Nar format from PORT. -Restore it as FILE with canonical permissions and timestamps." +Restore it as FILE with canonical permissions and timestamps. To write a +regular or executable file, call: + + (DUMP-FILE FILE INPUT SIZE TYPE) + +The default is to dump SIZE bytes from INPUT to FILE, but callers can provide +a custom procedure, for instance to deduplicate FILE on the fly." (fold-archive (lambda (file type content result) (match type ('directory @@ -473,12 +486,10 @@ Restore it as FILE with canonical permissions and timestamps." ((or 'regular 'executable) (match content ((input . size) - (call-with-output-file file - (lambda (output) - (dump input output size) - (chmod output (if (eq? type 'executable) - #o555 - #o444)))) + (dump-file file input size type) + (chmod file (if (eq? type 'executable) + #o555 + #o444)) (utime file 1 1 0 0)))))) #t port diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm index 0655ceb890..b4d37d4525 100644 --- a/guix/store/deduplication.scm +++ b/guix/store/deduplication.scm @@ -26,12 +26,15 @@ #:use-module (guix build syscalls) #:use-module (guix base32) #:use-module (srfi srfi-11) + #:use-module (srfi srfi-34) + #:use-module (srfi srfi-35) #:use-module (rnrs io ports) #:use-module (ice-9 ftw) #:use-module (ice-9 match) #:use-module (guix serialization) #:export (nar-sha256 - deduplicate)) + deduplicate + dump-file/deduplicate)) ;; XXX: This port is used as a workaround on Guile <= 2.2.4 where ;; 'port-position' throws to 'out-of-range' when the offset is great than or @@ -201,3 +204,55 @@ under STORE." ;; that's OK: we just can't deduplicate it more. #f) (else (apply throw args))))))))))) + +(define (tee input len output) + "Return a port that reads up to LEN bytes from INPUT and writes them to +OUTPUT as it goes." + (define bytes-read 0) + + (define (fail) + ;; Reached EOF before we had read LEN bytes from INPUT. + (raise (condition + (&nar-error (port input) + (file (port-filename output)))))) + + (define (read! bv start count) + ;; Read at most LEN bytes in total. + (let ((count (min count (- len bytes-read)))) + (let loop ((ret (get-bytevector-n! input bv start count))) + (cond ((eof-object? ret) + (if (= bytes-read len) + 0 ; EOF + (fail))) + ((and (zero? ret) (> count 0)) + ;; Do not return zero since zero means EOF, so try again. + (loop (get-bytevector-n! input bv start count))) + (else + (put-bytevector output bv start ret) + (set! bytes-read (+ bytes-read ret)) + ret))))) + + (make-custom-binary-input-port "tee input port" read! #f #f #f)) + +(define* (dump-file/deduplicate file input size type + #:key (store (%store-directory))) + "Write SIZE bytes read from INPUT to FILE. TYPE is a symbol, either +'regular or 'executable. + +This procedure is suitable as a #:dump-file argument to 'restore-file'. When +used that way, it deduplicates files on the fly as they are restored, thereby +removing the need to a deduplication pass that would re-read all the files +down the road." + (define hash + (call-with-output-file file + (lambda (output) + (let-values (((hash-port get-hash) + (open-hash-port (hash-algorithm sha256)))) + (write-file-tree file hash-port + #:file-type+size (lambda (_) (values type size)) + #:file-port + (const (tee input size output))) + (close-port hash-port) + (get-hash))))) + + (deduplicate file hash #:store store)) diff --git a/tests/nar.scm b/tests/nar.scm index 59616659c8..ba4881caaa 100644 --- a/tests/nar.scm +++ b/tests/nar.scm @@ -452,6 +452,9 @@ (false-if-exception (rm-rf %test-dir)) (setlocale LC_ALL locale))))) +;; XXX: Tell the 'deduplicate' procedure what store we're actually using. +(setenv "NIX_STORE" (%store-prefix)) + (test-assert "restore-file-set (signed, valid)" (with-store store (let* ((texts (unfold (cut >= <> 10) -- cgit v1.2.3