From 6a060ff27ff68384d7c90076baa36c349fff689d Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Thu, 10 Dec 2020 15:12:34 +0100 Subject: store-copy: 'populate-store' can optionally deduplicate files. Until now deduplication was performed as an additional pass after copying files, which involve re-traversing all the files that had just been copied. * guix/store/deduplication.scm (copy-file/deduplicate): New procedure. * tests/store-deduplication.scm ("copy-file/deduplicate"): New test. * guix/build/store-copy.scm (populate-store): Add #:deduplicate? parameter and honor it. * tests/gexp.scm ("gexp->derivation, store copy"): Pass #:deduplicate? #f to 'populate-store'. * gnu/build/image.scm (initialize-root-partition): Pass #:deduplicate? to 'populate-store'. Pass #:deduplicate? #f to 'register-closure'. * gnu/build/vm.scm (root-partition-initializer): Likewise. * gnu/build/install.scm (populate-single-profile-directory): Pass #:deduplicate? #f to 'populate-store'. * gnu/build/linux-initrd.scm (build-initrd): Likewise. * guix/scripts/pack.scm (self-contained-tarball)[import-module?]: New procedure. [build]: Pass it as an argument to 'source-module-closure'. * guix/scripts/pack.scm (squashfs-image)[build]: Wrap in 'with-extensions'. * gnu/system/linux-initrd.scm (expression->initrd)[import-module?]: New procedure. [builder]: Pass it to 'source-module-closure'. * gnu/system/install.scm (cow-store-service-type)[import-module?]: New procedure. Pass it to 'source-module-closure'. --- tests/store-deduplication.scm | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'tests/store-deduplication.scm') diff --git a/tests/store-deduplication.scm b/tests/store-deduplication.scm index e2870a363d..7b01acae24 100644 --- a/tests/store-deduplication.scm +++ b/tests/store-deduplication.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2018 Ludovic Courtès +;;; Copyright © 2018, 2020 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -25,6 +25,7 @@ #:use-module (rnrs bytevectors) #:use-module (ice-9 binary-ports) #:use-module (srfi srfi-1) + #:use-module (srfi srfi-26) #:use-module (srfi srfi-64)) (test-begin "store-deduplication") @@ -106,4 +107,19 @@ (cons (apply = (map (compose stat:ino stat) identical)) (map (compose stat:nlink stat) identical)))))) +(test-assert "copy-file/deduplicate" + (call-with-temporary-directory + (lambda (store) + (let ((source (search-path %load-path "gnu/packages/emacs-xyz.scm"))) + (for-each (lambda (target) + (copy-file/deduplicate source + (string-append store target) + #:store store)) + '("/a" "/b" "/c")) + (and (directory-exists? (string-append store "/.links")) + (file=? source (string-append store "/a")) + (apply = (map (compose stat:ino stat + (cut string-append store <>)) + '("/a" "/b" "/c")))))))) + (test-end "store-deduplication") -- cgit v1.2.3 From 7530e491b517497b7b8166b5ccecdc3d4cdb468d Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Fri, 11 Dec 2020 15:48:02 +0100 Subject: deduplicate: Create the '.links' directory lazily. This avoids repeated (mkdir-p "/gnu/store/.links") calls when deduplicating lots of files. * guix/store/deduplication.scm (deduplicate): Remove initial call to 'mkdir-p'. Add ENOENT case in 'link' exception handler. Reindent. * tests/store-deduplication.scm ("deduplicate, ENOSPC"): Check for (<= links 4) to account for the initial 'link' call. --- guix/store/deduplication.scm | 96 ++++++++++++++++++++++--------------------- tests/store-deduplication.scm | 2 +- 2 files changed, 51 insertions(+), 47 deletions(-) (limited to 'tests/store-deduplication.scm') diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm index 8564f12107..a72a43bf79 100644 --- a/guix/store/deduplication.scm +++ b/guix/store/deduplication.scm @@ -159,52 +159,56 @@ under STORE." (define links-directory (string-append store "/.links")) - (mkdir-p links-directory) - (let loop ((path path) - (type (stat:type (lstat path))) - (hash hash)) - (if (eq? 'directory type) - ;; Can't hardlink directories, so hardlink their atoms. - (for-each (match-lambda - ((file . properties) - (unless (member file '("." "..")) - (let* ((file (string-append path "/" file)) - (type (match (assoc-ref properties 'type) - ((or 'unknown #f) - (stat:type (lstat file))) - (type type)))) - (loop file type - (and (not (eq? 'directory type)) - (nar-sha256 file))))))) - (scandir* path)) - (let ((link-file (string-append links-directory "/" - (bytevector->nix-base32-string hash)))) - (if (file-exists? link-file) - (replace-with-link link-file path - #:swap-directory links-directory - #:store store) - (catch 'system-error - (lambda () - (link path link-file)) - (lambda args - (let ((errno (system-error-errno args))) - (cond ((= errno EEXIST) - ;; Someone else put an entry for PATH in - ;; LINKS-DIRECTORY before we could. Let's use it. - (replace-with-link path link-file - #:swap-directory - links-directory - #:store store)) - ((= errno ENOSPC) - ;; There's not enough room in the directory index for - ;; more entries in .links, but that's fine: we can - ;; just stop. - #f) - ((= errno EMLINK) - ;; PATH has reached the maximum number of links, but - ;; that's OK: we just can't deduplicate it more. - #f) - (else (apply throw args))))))))))) + (let loop ((path path) + (type (stat:type (lstat path))) + (hash hash)) + (if (eq? 'directory type) + ;; Can't hardlink directories, so hardlink their atoms. + (for-each (match-lambda + ((file . properties) + (unless (member file '("." "..")) + (let* ((file (string-append path "/" file)) + (type (match (assoc-ref properties 'type) + ((or 'unknown #f) + (stat:type (lstat file))) + (type type)))) + (loop file type + (and (not (eq? 'directory type)) + (nar-sha256 file))))))) + (scandir* path)) + (let ((link-file (string-append links-directory "/" + (bytevector->nix-base32-string hash)))) + (if (file-exists? link-file) + (replace-with-link link-file path + #:swap-directory links-directory + #:store store) + (catch 'system-error + (lambda () + (link path link-file)) + (lambda args + (let ((errno (system-error-errno args))) + (cond ((= errno EEXIST) + ;; Someone else put an entry for PATH in + ;; LINKS-DIRECTORY before we could. Let's use it. + (replace-with-link path link-file + #:swap-directory + links-directory + #:store store)) + ((= errno ENOENT) + ;; This most likely means that LINKS-DIRECTORY does + ;; not exist. Attempt to create it and try again. + (mkdir-p links-directory) + (loop path type hash)) + ((= errno ENOSPC) + ;; There's not enough room in the directory index for + ;; more entries in .links, but that's fine: we can + ;; just stop. + #f) + ((= errno EMLINK) + ;; PATH has reached the maximum number of links, but + ;; that's OK: we just can't deduplicate it more. + #f) + (else (apply throw args))))))))))) (define (tee input len output) "Return a port that reads up to LEN bytes from INPUT and writes them to diff --git a/tests/store-deduplication.scm b/tests/store-deduplication.scm index 7b01acae24..b1c2d93bbd 100644 --- a/tests/store-deduplication.scm +++ b/tests/store-deduplication.scm @@ -95,7 +95,7 @@ (lambda () (set! link (lambda (old new) (set! links (+ links 1)) - (if (<= links 3) + (if (<= links 4) (true-link old new) (throw 'system-error "link" "~A" '("Whaaat?!") (list ENOSPC)))))) -- cgit v1.2.3