From b20cd80ff1f3c9eb988a0cc27ed9538b68914608 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Thu, 28 Oct 2021 22:14:40 +0200 Subject: import: pypi: Allow imports of a specific version. * guix/import/pypi.scm (latest-version): New procedure. (latest-source-release): Rename to... (source-release): ... this. Add 'version' parameter. (latest-wheel-release): Rename to... (wheel-release): ... this. Add 'version' parameter. (pypi->guix-package): Honor 'version' parameter. (pypi-recursive-import): Add 'version' parameter and honor it. * guix/scripts/import/pypi.scm (guix-import-pypi): Expect a spec. Pass it to 'package-name->name+version'. Pass the 'version' parameter. * tests/pypi.scm ("pypi->guix-package, no wheel"): Exercise the #:version parameter. * doc/guix.texi (Invoking guix import): Document it. --- tests/pypi.scm | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'tests') diff --git a/tests/pypi.scm b/tests/pypi.scm index 70f4298a90..ad869ac31f 100644 --- a/tests/pypi.scm +++ b/tests/pypi.scm @@ -260,9 +260,15 @@ Requires-Dist: pytest (>=3.1.0); extra == 'testing' ('synopsis "summary") ('description "summary") ('license 'license:lgpl2.0)) - (string=? (bytevector->nix-base32-string - test-source-hash) - hash)) + (and (string=? (bytevector->nix-base32-string + test-source-hash) + hash) + (equal? (pypi->guix-package "foo" #:version "1.0.0") + (pypi->guix-package "foo")) + (catch 'quit + (lambda () + (pypi->guix-package "foo" #:version "42")) + (const #t)))) (x (pk 'fail x #f)))))) -- cgit v1.2.3 From 04d929570ad816793d7e0024a11314124ce87f98 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Fri, 29 Oct 2021 21:51:39 +0200 Subject: import: print: Properly render packages with origins as inputs. * guix/import/print.scm (package->code)[source->code]: Check whether VERSION is true before calling 'factorize-uri'. [package-lists->code]: Add clause for inputs that are origins. * tests/print.scm (pkg-with-origin-input, pkg-with-origin-input-source): New variables. ("package with origin input"): New test. --- guix/import/print.scm | 14 +++++++++----- tests/print.scm | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'tests') diff --git a/guix/import/print.scm b/guix/import/print.scm index 0310739b3a..8acf5d52f6 100644 --- a/guix/import/print.scm +++ b/guix/import/print.scm @@ -89,9 +89,11 @@ when evaluated." (guix hg-download) (guix svn-download))) (procedure-name method))) - (uri (string-append ,@(match (factorize-uri uri version) - ((? string? uri) (list uri)) - (factorized factorized)))) + (uri ,(if version + `(string-append ,@(match (factorize-uri uri version) + ((? string? uri) (list uri)) + (factorized factorized))) + uri)) ,(if (equal? (content-hash-algorithm hash) 'sha256) `(sha256 (base32 ,(bytevector->nix-base32-string (content-hash-value hash)))) @@ -109,7 +111,7 @@ when evaluated." (map (match-lambda ((? symbol? s) (list (symbol->string s) (list 'unquote s))) - ((label pkg . out) + ((label (? package? pkg) . out) (let ((mod (package-module-name pkg))) (cons* label ;; FIXME: using '@ certainly isn't pretty, but it @@ -117,7 +119,9 @@ when evaluated." ;; modules. (list 'unquote (list '@ mod (variable-name pkg mod))) - out)))) + out))) + ((label (? origin? origin)) + (list label (list 'unquote (source->code origin #f))))) lsts))) (let ((name (package-name package)) diff --git a/tests/print.scm b/tests/print.scm index 3386590d3a..ad19f4573a 100644 --- a/tests/print.scm +++ b/tests/print.scm @@ -67,6 +67,30 @@ (description "This is a dummy package.") (license license:gpl3+))) +(define-with-source pkg-with-origin-input pkg-with-origin-input-source + (package + (name "test") + (version "1.2.3") + (source (origin + (method url-fetch) + (uri (string-append "file:///tmp/test-" + version ".tar.gz")) + (sha256 + (base32 + "070pwb7brdcn1mfvplkd56vjc7lbz4iznzkqvfsakvgbv68k71ah")))) + (build-system (@ (guix build-system gnu) gnu-build-system)) + (inputs + `(("o" ,(origin + (method url-fetch) + (uri "http://example.org/somefile.txt") + (sha256 + (base32 + "0000000000000000000000000000000000000000000000000000")))))) + (home-page "http://gnu.org") + (synopsis "Dummy") + (description "This is a dummy package.") + (license license:gpl3+))) + (test-equal "simple package" `(define-public test ,pkg-source) (package->code pkg)) @@ -75,4 +99,8 @@ `(define-public test ,pkg-with-inputs-source) (package->code pkg-with-inputs)) +(test-equal "package with origin input" + `(define-public test ,pkg-with-origin-input-source) + (package->code pkg-with-origin-input)) + (test-end "print") -- cgit v1.2.3 From b3240ae846cb1ace2322a68eca3497f11d0be6f1 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Fri, 29 Oct 2021 22:23:21 +0200 Subject: import: print: Correctly handle URI lists. * guix/import/print.scm (package->code)[factorized-uri-code]: New procedure. [source->code]: Use it, and factorize URI when it's a list. * tests/print.scm (pkg-with-origin-input): Check origin URI to a list. --- guix/import/print.scm | 15 ++++++++++++--- tests/print.scm | 6 ++++-- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'tests') diff --git a/guix/import/print.scm b/guix/import/print.scm index 8acf5d52f6..4e65d18bc3 100644 --- a/guix/import/print.scm +++ b/guix/import/print.scm @@ -25,6 +25,7 @@ #:use-module (guix build-system) #:use-module (gnu packages) #:use-module (srfi srfi-1) + #:use-module (srfi srfi-26) #:use-module (guix import utils) #:use-module (ice-9 control) #:use-module (ice-9 match) @@ -72,6 +73,11 @@ when evaluated." (file-type (quote ,(search-path-specification-file-type spec))) (file-pattern ,(search-path-specification-file-pattern spec)))) + (define (factorized-uri-code uri version) + (match (factorize-uri uri version) + ((? string? uri) uri) + ((factorized ...) `(string-append ,@factorized)))) + (define (source->code source version) (let ((uri (origin-uri source)) (method (origin-method source)) @@ -90,9 +96,12 @@ when evaluated." (guix svn-download))) (procedure-name method))) (uri ,(if version - `(string-append ,@(match (factorize-uri uri version) - ((? string? uri) (list uri)) - (factorized factorized))) + (match uri + ((? string? uri) + (factorized-uri-code uri version)) + ((lst ...) + `(list + ,@(map (cut factorized-uri-code <> version) uri)))) uri)) ,(if (equal? (content-hash-algorithm hash) 'sha256) `(sha256 (base32 ,(bytevector->nix-base32-string diff --git a/tests/print.scm b/tests/print.scm index ad19f4573a..7f4c8ccdd1 100644 --- a/tests/print.scm +++ b/tests/print.scm @@ -73,8 +73,10 @@ (version "1.2.3") (source (origin (method url-fetch) - (uri (string-append "file:///tmp/test-" - version ".tar.gz")) + (uri (list (string-append "file:///tmp/test-" + version ".tar.gz") + (string-append "http://example.org/test-" + version ".tar.gz"))) (sha256 (base32 "070pwb7brdcn1mfvplkd56vjc7lbz4iznzkqvfsakvgbv68k71ah")))) -- cgit v1.2.3 From b2ed40c29f578d46d42cb1c5e99bd797cea3aba0 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Fri, 29 Oct 2021 22:29:05 +0200 Subject: import: print: Handle patches that are origins. * guix/import/print.scm (package->code)[source->code]: Handle patches that are origins. * tests/print.scm (pkg-with-origin-input): Add 'patches' field. (pkg-with-origin-patch, pkg-with-origin-patch-source): New variables. ("package with origin patch"): New test. --- guix/import/print.scm | 13 +++++++++++-- tests/print.scm | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 3 deletions(-) (limited to 'tests') diff --git a/guix/import/print.scm b/guix/import/print.scm index 4e65d18bc3..e04a6647b4 100644 --- a/guix/import/print.scm +++ b/guix/import/print.scm @@ -112,8 +112,17 @@ when evaluated." ;; FIXME: in order to be able to throw away the directory prefix, ;; we just assume that the patch files can be found with ;; "search-patches". - ,@(if (null? patches) '() - `((patches (search-patches ,@(map basename patches)))))))) + ,@(cond ((null? patches) + '()) + ((every string? patches) + `((patches (search-patches ,@(map basename patches))))) + (else + `((patches (list ,@(map (match-lambda + ((? string? file) + `(search-patch ,file)) + ((? origin? origin) + (source->code origin #f))) + patches))))))))) (define (package-lists->code lsts) (list 'quasiquote diff --git a/tests/print.scm b/tests/print.scm index 7f4c8ccdd1..ff0db469ab 100644 --- a/tests/print.scm +++ b/tests/print.scm @@ -22,6 +22,7 @@ #:use-module (guix download) #:use-module (guix packages) #:use-module ((guix licenses) #:prefix license:) + #:use-module ((gnu packages) #:select (search-patches)) #:use-module (srfi srfi-64)) (define-syntax-rule (define-with-source object source expr) @@ -79,7 +80,9 @@ version ".tar.gz"))) (sha256 (base32 - "070pwb7brdcn1mfvplkd56vjc7lbz4iznzkqvfsakvgbv68k71ah")))) + "070pwb7brdcn1mfvplkd56vjc7lbz4iznzkqvfsakvgbv68k71ah")) + (patches (search-patches "guile-linux-syscalls.patch" + "guile-relocatable.patch")))) (build-system (@ (guix build-system gnu) gnu-build-system)) (inputs `(("o" ,(origin @@ -93,6 +96,30 @@ (description "This is a dummy package.") (license license:gpl3+))) +(define-with-source pkg-with-origin-patch pkg-with-origin-patch-source + (package + (name "test") + (version "1.2.3") + (source (origin + (method url-fetch) + (uri (string-append "file:///tmp/test-" + version ".tar.gz")) + (sha256 + (base32 + "070pwb7brdcn1mfvplkd56vjc7lbz4iznzkqvfsakvgbv68k71ah")) + (patches + (list (origin + (method url-fetch) + (uri "http://example.org/x.patch") + (sha256 + (base32 + "0000000000000000000000000000000000000000000000000000"))))))) + (build-system (@ (guix build-system gnu) gnu-build-system)) + (home-page "http://gnu.org") + (synopsis "Dummy") + (description "This is a dummy package.") + (license license:gpl3+))) + (test-equal "simple package" `(define-public test ,pkg-source) (package->code pkg)) @@ -105,4 +132,8 @@ `(define-public test ,pkg-with-origin-input-source) (package->code pkg-with-origin-input)) +(test-equal "package with origin patch" + `(define-public test ,pkg-with-origin-patch-source) + (package->code pkg-with-origin-patch)) + (test-end "print") -- cgit v1.2.3 From 3756ce32674139376bcf11dac96bc562582088f7 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Thu, 11 Nov 2021 00:10:44 +0100 Subject: import: print: Replace packages and origins in 'arguments'. * guix/import/print.scm (package->code)[variable-reference] [object->code]: New procedures. [package-lists->code]: Rewrite in terms of 'object->code'. Pass the 'arguments' field through 'object->code'. * tests/print.scm (pkg-with-arguments, pkg-with-arguments-source): New variables. ("package with arguments"): New test. --- guix/import/print.scm | 50 ++++++++++++++++++++++++++++++-------------------- tests/print.scm | 23 +++++++++++++++++++++++ 2 files changed, 53 insertions(+), 20 deletions(-) (limited to 'tests') diff --git a/guix/import/print.scm b/guix/import/print.scm index e04a6647b4..767b0528d5 100644 --- a/guix/import/print.scm +++ b/guix/import/print.scm @@ -1,5 +1,6 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2017, 2020 Ricardo Wurmus +;;; Copyright © 2021 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -31,9 +32,6 @@ #:use-module (ice-9 match) #:export (package->code)) -;; FIXME: the quasiquoted arguments field may contain embedded package -;; objects, e.g. in #:disallowed-references; they will just be printed with -;; their usual # representation, not as variable names. (define (package->code package) "Return an S-expression representing the source code that produces PACKAGE when evaluated." @@ -124,23 +122,34 @@ when evaluated." (source->code origin #f))) patches))))))))) + (define (variable-reference module name) + ;; FIXME: using '@ certainly isn't pretty, but it avoids having to import + ;; the individual package modules. + (list '@ module name)) + + (define (object->code obj quoted?) + (match obj + ((? package? package) + (let* ((module (package-module-name package)) + (name (variable-name package module))) + (if quoted? + (list 'unquote (variable-reference module name)) + (variable-reference module name)))) + ((? origin? origin) + (let ((code (source->code origin #f))) + (if quoted? + (list 'unquote code) + code))) + ((lst ...) + (let ((lst (map (cut object->code <> #t) lst))) + (if quoted? + lst + (list 'quasiquote lst)))) + (obj + obj))) + (define (package-lists->code lsts) - (list 'quasiquote - (map (match-lambda - ((? symbol? s) - (list (symbol->string s) (list 'unquote s))) - ((label (? package? pkg) . out) - (let ((mod (package-module-name pkg))) - (cons* label - ;; FIXME: using '@ certainly isn't pretty, but it - ;; avoids having to import the individual package - ;; modules. - (list 'unquote - (list '@ mod (variable-name pkg mod))) - out))) - ((label (? origin? origin)) - (list label (list 'unquote (source->code origin #f))))) - lsts))) + (list 'quasiquote (object->code lsts #t))) (let ((name (package-name package)) (version (package-version package)) @@ -176,7 +185,8 @@ when evaluated." '-build-system))) ,@(match arguments (() '()) - (args `((arguments ,(list 'quasiquote args))))) + (_ `((arguments + ,(list 'quasiquote (object->code arguments #t)))))) ,@(match outputs (("out") '()) (outs `((outputs (list ,@outs))))) diff --git a/tests/print.scm b/tests/print.scm index ff0db469ab..1527251b01 100644 --- a/tests/print.scm +++ b/tests/print.scm @@ -120,6 +120,25 @@ (description "This is a dummy package.") (license license:gpl3+))) +(define-with-source pkg-with-arguments pkg-with-arguments-source + (package + (name "test") + (version "1.2.3") + (source (origin + (method url-fetch) + (uri (string-append "file:///tmp/test-" + version ".tar.gz")) + (sha256 + (base32 + "070pwb7brdcn1mfvplkd56vjc7lbz4iznzkqvfsakvgbv68k71ah")))) + (build-system (@ (guix build-system gnu) gnu-build-system)) + (arguments + `(#:disallowed-references (,(@ (gnu packages base) coreutils)))) + (home-page "http://gnu.org") + (synopsis "Dummy") + (description "This is a dummy package.") + (license license:gpl3+))) + (test-equal "simple package" `(define-public test ,pkg-source) (package->code pkg)) @@ -136,4 +155,8 @@ `(define-public test ,pkg-with-origin-patch-source) (package->code pkg-with-origin-patch)) +(test-equal "package with arguments" + `(define-public test ,pkg-with-arguments-source) + (package->code pkg-with-arguments)) + (test-end "print") -- cgit v1.2.3 From 114005bea61efcd5f7d768e2d6503e873f654c16 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Sat, 13 Nov 2021 23:02:18 +0100 Subject: tests: Adjust tests/egg.scm to latest API changes. This is a followup to b999c80c2e71bd4b3f26a18a321b7e7e7b580103. * tests/egg.scm (eval-test-with-egg-file): Pass 'version' argument to 'egg->guix-package'. --- tests/egg.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tests') diff --git a/tests/egg.scm b/tests/egg.scm index 0884d8d429..99dd0a3fc7 100644 --- a/tests/egg.scm +++ b/tests/egg.scm @@ -72,7 +72,7 @@ (call-with-output-file egg-file (lambda (port) (write egg-test port))) - (matcher (egg->guix-package egg-name + (matcher (egg->guix-package egg-name #f #:file egg-file #:source (plain-file (string-append egg-name "-egg") -- cgit v1.2.3 From f39397b21041fe418247239f27473aff49a203c9 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Sat, 13 Nov 2021 16:11:25 +0100 Subject: tests: Factorize 'file=?'. * guix/tests.scm (file=?): Add optional 'stat' parameter. Add fast patch comparing inode numbers. * tests/gexp.scm ("imported-files with file-like objects"): Remove 'file=?' procedure and use the one from (guix tests). --- guix/tests.scm | 30 +++++++++++++++++------------- tests/gexp.scm | 11 +++-------- 2 files changed, 20 insertions(+), 21 deletions(-) (limited to 'tests') diff --git a/guix/tests.scm b/guix/tests.scm index fc3d521163..e1c194340c 100644 --- a/guix/tests.scm +++ b/guix/tests.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès +;;; Copyright © 2013-2021 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -182,18 +182,22 @@ too expensive to build entirely in the test store." (loop (1+ i))) bv)))) -(define (file=? a b) - "Return true if files A and B have the same type and same content." - (and (eq? (stat:type (lstat a)) (stat:type (lstat b))) - (case (stat:type (lstat a)) - ((regular) - (equal? - (call-with-input-file a get-bytevector-all) - (call-with-input-file b get-bytevector-all))) - ((symlink) - (string=? (readlink a) (readlink b))) - (else - (error "what?" (lstat a)))))) +(define* (file=? a b #:optional (stat lstat)) + "Return true if files A and B have the same type and same content. Call +STAT to obtain file metadata." + (let ((sta (stat a)) (stb (stat b))) + (and (eq? (stat:type sta) (stat:type stb)) + (case (stat:type sta) + ((regular) + (or (and (= (stat:ino sta) (stat:ino stb)) + (= (stat:dev sta) (stat:dev stb))) + (equal? + (call-with-input-file a get-bytevector-all) + (call-with-input-file b get-bytevector-all)))) + ((symlink) + (string=? (readlink a) (readlink b))) + (else + (error "what?" (stat a))))))) (define (canonical-file? file) "Return #t if FILE is in the store, is read-only, and its mtime is 1." diff --git a/tests/gexp.scm b/tests/gexp.scm index 39a47d4e8c..0758a49f5f 100644 --- a/tests/gexp.scm +++ b/tests/gexp.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès +;;; Copyright © 2014-2021 Ludovic Courtès ;;; Copyright © 2021 Maxime Devos ;;; ;;; This file is part of GNU Guix. @@ -827,19 +827,14 @@ (files -> `(("a/b/c" . ,q-scm) ("p/q" . ,plain))) (drv (imported-files files))) - (define (file=? file1 file2) - ;; Assume deduplication is in place. - (= (stat:ino (stat file1)) - (stat:ino (stat file2)))) - (mbegin %store-monad (built-derivations (list (pk 'drv drv))) (mlet %store-monad ((dir -> (derivation->output-path drv)) (plain* (text-file "foo" "bar!")) (q-scm* (interned-file q-scm "c"))) (return - (and (file=? (string-append dir "/a/b/c") q-scm*) - (file=? (string-append dir "/p/q") plain*))))))) + (and (file=? (string-append dir "/a/b/c") q-scm* stat) + (file=? (string-append dir "/p/q") plain* stat))))))) (test-equal "gexp-modules & ungexp" '((bar) (foo)) -- cgit v1.2.3 From 472a0e82a52a3d5d841e1dfad6b13e26082a5750 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Sat, 13 Nov 2021 21:47:15 +0100 Subject: daemon: Do not deduplicate files smaller than 8 KiB. Files smaller than 8 KiB typically represent ~70% of the entries in /gnu/store/.links but only contribute to ~4% of the space savings afforded by deduplication. Not considering these files for deduplication speeds up file insertion in the store and, more importantly, leaves 'removeUnusedLinks' with fewer entries to traverse, thereby speeding it up proportionally. Partly fixes . * config-daemon.ac: Remove symlink hard link check and CAN_LINK_SYMLINK definition. * guix/store/deduplication.scm (%deduplication-minimum-size): New variable. (deduplicate)[loop]: Do not recurse when FILE's size is below %DEDUPLICATION-MINIMUM-SIZE. (dump-port): New procedure. (dump-file/deduplicate)[hash]: Turn into... [dump-and-compute-hash]: ... this thunk. Call 'deduplicate' only when SIZE is greater than %DEDUPLICATION-MINIMUM-SIZE; otherwise call 'dump-port'. * nix/libstore/gc.cc (LocalStore::removeUnusedLinks): Drop files where st.st_size < deduplicationMinSize. * nix/libstore/local-store.hh (deduplicationMinSize): New declaration. * nix/libstore/optimise-store.cc (deduplicationMinSize): New variable. (LocalStore::optimisePath_): Return when PATH is a symlink or smaller than 'deduplicationMinSize'. * tests/derivations.scm ("identical files are deduplicated"): Produce files bigger than %DEDUPLICATION-MINIMUM-SIZE. * tests/nar.scm ("restore-file-set with directories (signed, valid)"): Likewise. * tests/store-deduplication.scm ("deduplicate, below %deduplication-minimum-size"): New test. ("deduplicate", "deduplicate, ENOSPC"): Produce files bigger than %DEDUPLICATION-MINIMUM-SIZE. * tests/store.scm ("substitute, deduplication"): Likewise. --- config-daemon.ac | 11 ------- guix/store/deduplication.scm | 69 ++++++++++++++++++++++++++++++++++++------ nix/libstore/gc.cc | 4 ++- nix/libstore/local-store.hh | 3 ++ nix/libstore/optimise-store.cc | 15 +++++---- tests/derivations.scm | 14 ++++++--- tests/nar.scm | 7 +++-- tests/store-deduplication.scm | 41 +++++++++++++++++++++---- tests/store.scm | 4 ++- 9 files changed, 126 insertions(+), 42 deletions(-) (limited to 'tests') diff --git a/config-daemon.ac b/config-daemon.ac index 5ddc740600..86306effe1 100644 --- a/config-daemon.ac +++ b/config-daemon.ac @@ -94,17 +94,6 @@ if test "x$guix_build_daemon" = "xyes"; then AC_CHECK_FUNCS([lutimes lchown posix_fallocate sched_setaffinity \ statvfs nanosleep strsignal statx]) - dnl Check whether the store optimiser can optimise symlinks. - AC_MSG_CHECKING([whether it is possible to create a link to a symlink]) - ln -s bla tmp_link - if ln tmp_link tmp_link2 2> /dev/null; then - AC_MSG_RESULT(yes) - AC_DEFINE(CAN_LINK_SYMLINK, 1, [Whether link() works on symlinks.]) - else - AC_MSG_RESULT(no) - fi - rm -f tmp_link tmp_link2 - dnl Check for . AC_LANG_PUSH(C++) AC_CHECK_HEADERS([locale]) diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm index cd9660174c..370df4a74c 100644 --- a/guix/store/deduplication.scm +++ b/guix/store/deduplication.scm @@ -1,6 +1,6 @@ ;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2017 Caleb Ristvedt -;;; Copyright © 2018, 2019, 2020 Ludovic Courtès +;;; Copyright © 2018-2021 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -22,12 +22,13 @@ (define-module (guix store deduplication) #:use-module (gcrypt hash) - #:use-module (guix build utils) + #:use-module ((guix build utils) #:hide (dump-port)) #:use-module (guix build syscalls) #:use-module (guix base32) #:use-module (srfi srfi-11) #:use-module (srfi srfi-34) #:use-module (srfi srfi-35) + #:use-module (rnrs bytevectors) #:use-module (rnrs io ports) #:use-module (ice-9 ftw) #:use-module (ice-9 match) @@ -37,6 +38,31 @@ dump-file/deduplicate copy-file/deduplicate)) +;; TODO: Remove once 'dump-port' in (guix build utils) has an optional 'len' +;; parameter. +(define* (dump-port in out + #:optional len + #:key (buffer-size 16384)) + "Read LEN bytes from IN (or as much as possible if LEN is #f) and write it +to OUT, using chunks of BUFFER-SIZE bytes." + (define buffer + (make-bytevector buffer-size)) + + (let loop ((total 0) + (bytes (get-bytevector-n! in buffer 0 + (if len + (min len buffer-size) + buffer-size)))) + (or (eof-object? bytes) + (and len (= total len)) + (let ((total (+ total bytes))) + (put-bytevector out buffer 0 bytes) + (loop total + (get-bytevector-n! in buffer 0 + (if len + (min (- len total) buffer-size) + buffer-size))))))) + (define (nar-sha256 file) "Gives the sha256 hash of a file and the size of the file in nar form." (let-values (((port get-hash) (open-sha256-port))) @@ -127,11 +153,27 @@ Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system." (unless (= EMLINK (system-error-errno args)) (apply throw args))))))) +(define %deduplication-minimum-size + ;; Size below which files are not deduplicated. This avoids adding too many + ;; entries to '.links', which would slow down 'removeUnusedLinks' while + ;; saving little space. Keep in sync with optimize-store.cc. + 8192) + (define* (deduplicate path hash #:key (store (%store-directory))) "Check if a store item with sha256 hash HASH already exists. If so, replace PATH with a hardlink to the already-existing one. If not, register PATH so that future duplicates can hardlink to it. PATH is assumed to be under STORE." + ;; Lightweight promises. + (define-syntax-rule (delay exp) + (let ((value #f)) + (lambda () + (unless value + (set! value exp)) + value))) + (define-syntax-rule (force promise) + (promise)) + (define links-directory (string-append store "/.links")) @@ -144,13 +186,18 @@ under STORE." ((file . properties) (unless (member file '("." "..")) (let* ((file (string-append path "/" file)) + (st (delay (lstat file))) (type (match (assoc-ref properties 'type) ((or 'unknown #f) - (stat:type (lstat file))) + (stat:type (force st))) (type type)))) - (loop file type - (and (not (eq? 'directory type)) - (nar-sha256 file))))))) + (when (or (eq? 'directory type) + (and (eq? 'regular type) + (>= (stat:size (force st)) + %deduplication-minimum-size))) + (loop file type + (and (not (eq? 'directory type)) + (nar-sha256 file)))))))) (scandir* path)) (let ((link-file (string-append links-directory "/" (bytevector->nix-base32-string hash)))) @@ -222,9 +269,9 @@ OUTPUT as it goes." This procedure is suitable as a #:dump-file argument to 'restore-file'. When used that way, it deduplicates files on the fly as they are restored, thereby -removing the need to a deduplication pass that would re-read all the files +removing the need for a deduplication pass that would re-read all the files down the road." - (define hash + (define (dump-and-compute-hash) (call-with-output-file file (lambda (output) (let-values (((hash-port get-hash) @@ -236,7 +283,11 @@ down the road." (close-port hash-port) (get-hash))))) - (deduplicate file hash #:store store)) + (if (>= size %deduplication-minimum-size) + (deduplicate file (dump-and-compute-hash) #:store store) + (call-with-output-file file + (lambda (output) + (dump-port input output size))))) (define* (copy-file/deduplicate source target #:key (store (%store-directory))) diff --git a/nix/libstore/gc.cc b/nix/libstore/gc.cc index e1d0765154..16519116e4 100644 --- a/nix/libstore/gc.cc +++ b/nix/libstore/gc.cc @@ -606,7 +606,9 @@ void LocalStore::removeUnusedLinks(const GCState & state) throw SysError(format("statting `%1%'") % path); #endif - if (st.st_nlink != 1) { + /* Drop links for files smaller than 'deduplicationMinSize', even if + they have more than one hard link. */ + if (st.st_nlink != 1 && st.st_size >= deduplicationMinSize) { actualSize += st.st_size; unsharedSize += (st.st_nlink - 1) * st.st_size; continue; diff --git a/nix/libstore/local-store.hh b/nix/libstore/local-store.hh index 9ba37219da..20d3c3c893 100644 --- a/nix/libstore/local-store.hh +++ b/nix/libstore/local-store.hh @@ -292,4 +292,7 @@ void canonicaliseTimestampAndPermissions(const Path & path); MakeError(PathInUse, Error); +/* Size below which a file is not considered for deduplication. */ +extern const size_t deduplicationMinSize; + } diff --git a/nix/libstore/optimise-store.cc b/nix/libstore/optimise-store.cc index eb303ab4c3..9fd6f3cb35 100644 --- a/nix/libstore/optimise-store.cc +++ b/nix/libstore/optimise-store.cc @@ -15,6 +15,9 @@ namespace nix { +/* Any file smaller than this is not considered for deduplication. + Keep in sync with (guix store deduplication). */ +const size_t deduplicationMinSize = 8192; static void makeWritable(const Path & path) { @@ -105,12 +108,12 @@ void LocalStore::optimisePath_(OptimiseStats & stats, const Path & path, InodeHa return; } - /* We can hard link regular files and maybe symlinks. */ - if (!S_ISREG(st.st_mode) -#if CAN_LINK_SYMLINK - && !S_ISLNK(st.st_mode) -#endif - ) return; + /* We can hard link regular files (and maybe symlinks), but do that only + for files larger than some threshold. This avoids adding too many + entries to '.links', which would slow down 'removeUnusedLinks' while + saving little space. */ + if (!S_ISREG(st.st_mode) || ((size_t) st.st_size) < deduplicationMinSize) + return; /* Sometimes SNAFUs can cause files in the store to be modified, in particular when running programs as root under diff --git a/tests/derivations.scm b/tests/derivations.scm index cd165d1be6..0775719ea3 100644 --- a/tests/derivations.scm +++ b/tests/derivations.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès +;;; Copyright © 2012-2021 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -170,11 +170,15 @@ #f)))) (test-assert "identical files are deduplicated" - (let* ((build1 (add-text-to-store %store "one.sh" - "echo hello, world > \"$out\"\n" + ;; Note: DATA must be longer than %DEDUPLICATION-MINIMUM-SIZE. + (let* ((data (make-string 9000 #\a)) + (build1 (add-text-to-store %store "one.sh" + (string-append "echo -n " data + " > \"$out\"\n") '())) (build2 (add-text-to-store %store "two.sh" - "# Hey!\necho hello, world > \"$out\"\n" + (string-append "# Hey!\necho -n " + data " > \"$out\"\n") '())) (drv1 (derivation %store "foo" %bash `(,build1) @@ -187,7 +191,7 @@ (file2 (derivation->output-path drv2))) (and (valid-path? %store file1) (valid-path? %store file2) (string=? (call-with-input-file file1 get-string-all) - "hello, world\n") + data) (= (stat:ino (lstat file1)) (stat:ino (lstat file2)))))))) diff --git a/tests/nar.scm b/tests/nar.scm index ba4881caaa..98752f2088 100644 --- a/tests/nar.scm +++ b/tests/nar.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès +;;; Copyright © 2012-2021 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -486,8 +486,9 @@ ;; their mtime and permissions were not reset. Ensure that this bug is ;; gone. (with-store store - (let* ((text1 (random-text)) - (text2 (random-text)) + ;; Note: TEXT1 and TEXT2 must be longer than %DEDUPLICATION-MINIMUM-SIZE. + (let* ((text1 (string-concatenate (make-list 200 (random-text)))) + (text2 (string-concatenate (make-list 200 (random-text)))) (tree `("tree" directory ("a" regular (data ,text1)) ("b" directory diff --git a/tests/store-deduplication.scm b/tests/store-deduplication.scm index b1c2d93bbd..2950fbc1a3 100644 --- a/tests/store-deduplication.scm +++ b/tests/store-deduplication.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2018, 2020 Ludovic Courtès +;;; Copyright © 2018, 2020-2021 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -30,13 +30,40 @@ (test-begin "store-deduplication") +(test-equal "deduplicate, below %deduplication-minimum-size" + (list #t (make-list 5 1)) + + (call-with-temporary-directory + (lambda (store) + ;; Note: DATA must be longer than %DEDUPLICATION-MINIMUM-SIZE. + (let ((data "Hello, world!") + (identical (map (lambda (n) + (string-append store "/" (number->string n) + "/a/b/c")) + (iota 5)))) + (for-each (lambda (file) + (mkdir-p (dirname file)) + (call-with-output-file file + (lambda (port) + (put-bytevector port (string->utf8 data))))) + identical) + + (deduplicate store (nar-sha256 store) #:store store) + + ;; (system (string-append "ls -lRia " store)) + (list (= (length (delete-duplicates + (map (compose stat:ino stat) identical))) + (length identical)) + (map (compose stat:nlink stat) identical)))))) + (test-equal "deduplicate" (cons* #t #f ;inode comparisons 2 (make-list 5 6)) ;'nlink' values (call-with-temporary-directory (lambda (store) - (let ((data (string->utf8 "Hello, world!")) + ;; Note: DATA must be longer than %DEDUPLICATION-MINIMUM-SIZE. + (let ((data (string-concatenate (make-list 1000 "Hello, world!"))) (identical (map (lambda (n) (string-append store "/" (number->string n) "/a/b/c")) @@ -46,7 +73,7 @@ (mkdir-p (dirname file)) (call-with-output-file file (lambda (port) - (put-bytevector port data)))) + (put-bytevector port (string->utf8 data))))) identical) ;; Make the parent of IDENTICAL read-only. This should not prevent ;; deduplication from inserting its hard link. @@ -54,7 +81,7 @@ (call-with-output-file unique (lambda (port) - (put-bytevector port (string->utf8 "This is unique.")))) + (put-bytevector port (string->utf8 (string-reverse data))))) (deduplicate store (nar-sha256 store) #:store store) @@ -77,8 +104,10 @@ (lambda (store) (let ((true-link link) (links 0) - (data1 (string->utf8 "Hello, world!")) - (data2 (string->utf8 "Hi, world!")) + (data1 (string->utf8 + (string-concatenate (make-list 1000 "Hello, world!")))) + (data2 (string->utf8 + (string-concatenate (make-list 1000 "Hi, world!")))) (identical (map (lambda (n) (string-append store "/" (number->string n) "/a/b/c")) diff --git a/tests/store.scm b/tests/store.scm index 2150a0048c..5c9f651d6c 100644 --- a/tests/store.scm +++ b/tests/store.scm @@ -759,7 +759,9 @@ (test-assert "substitute, deduplication" (with-store s - (let* ((c (random-text)) ; contents of the output + ;; Note: C must be longer than %DEDUPLICATION-MINIMUM-SIZE. + (let* ((c (string-concatenate + (make-list 200 (random-text)))) ; contents of the output (g (package-derivation s %bootstrap-guile)) (d1 (build-expression->derivation s "substitute-me" `(begin ,c (exit 1)) -- cgit v1.2.3 From f35976584cce09e3e857f75caa812daedc8cc07e Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Tue, 16 Nov 2021 13:31:40 +0100 Subject: home: Adjust 'guix home import' test for shell aliases. This is a followup to 4b96998292442ec03024481c911d88f86c7c36b5. * tests/home-import.scm (match-home-environment-bash-service): Match the 'aliases' field. --- tests/home-import.scm | 1 + 1 file changed, 1 insertion(+) (limited to 'tests') diff --git a/tests/home-import.scm b/tests/home-import.scm index abd3cec43d..0bcdf8a469 100644 --- a/tests/home-import.scm +++ b/tests/home-import.scm @@ -151,6 +151,7 @@ corresponding file." ('list ('service 'home-bash-service-type ('home-bash-configuration + ('aliases ('quote ())) ('bashrc ('list ('local-file "/tmp/guix-config/.bashrc" "bashrc")))))))))) -- cgit v1.2.3