From 4f3bdc8f21657dbda857027b3ec8754dd4c7c67b Mon Sep 17 00:00:00 2001 From: Maxim Cournoyer Date: Thu, 17 Jun 2021 01:22:35 -0400 Subject: pack: Prevent duplicate files in tar archives. Tar translate duplicate files in the archive into hard links. These can cause problems, as not every tool support them; for example dpkg doesn't. * gnu/system/file-systems.scm (reduce-directories): New procedure. (file-prefix?): Lift the restriction on file prefix. The procedure can be useful for comparing relative file names. Adjust doc. (file-name-depth): New procedure, extracted from ... (btrfs-store-subvolume-file-name): ... here. * guix/scripts/pack.scm (self-contained-tarball/builder): Use reduce-directories. * tests/file-systems.scm ("reduce-directories"): New test. --- gnu/system/file-systems.scm | 56 ++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 18 deletions(-) (limited to 'gnu') diff --git a/gnu/system/file-systems.scm b/gnu/system/file-systems.scm index 464e87cb18..fb87bfc85b 100644 --- a/gnu/system/file-systems.scm +++ b/gnu/system/file-systems.scm @@ -55,6 +55,7 @@ file-system-dependencies file-system-location + reduce-directories file-system-type-predicate btrfs-subvolume? btrfs-store-subvolume-file-name @@ -231,8 +232,8 @@ (char-set-complement (char-set #\/))) (define (file-prefix? file1 file2) - "Return #t if FILE1 denotes the name of a file that is a parent of FILE2, -where both FILE1 and FILE2 are absolute file name. For example: + "Return #t if FILE1 denotes the name of a file that is a parent of FILE2. +For example: (file-prefix? \"/gnu\" \"/gnu/store\") => #t @@ -240,19 +241,41 @@ where both FILE1 and FILE2 are absolute file name. For example: (file-prefix? \"/gn\" \"/gnu/store\") => #f " - (and (string-prefix? "/" file1) - (string-prefix? "/" file2) - (let loop ((file1 (string-tokenize file1 %not-slash)) - (file2 (string-tokenize file2 %not-slash))) - (match file1 - (() - #t) - ((head1 tail1 ...) - (match file2 - ((head2 tail2 ...) - (and (string=? head1 head2) (loop tail1 tail2))) - (() - #f))))))) + (let loop ((file1 (string-tokenize file1 %not-slash)) + (file2 (string-tokenize file2 %not-slash))) + (match file1 + (() + #t) + ((head1 tail1 ...) + (match file2 + ((head2 tail2 ...) + (and (string=? head1 head2) (loop tail1 tail2))) + (() + #f)))))) + +(define (file-name-depth file-name) + (length (string-tokenize file-name %not-slash))) + +(define (reduce-directories file-names) + "Eliminate entries in FILE-NAMES that are children of other entries in +FILE-NAMES. This is for example useful when passing a list of files to GNU +tar, which would otherwise descend into each directory passed and archive the +duplicate files as hard links, which can be undesirable." + (let* ((file-names/sorted + ;; Ascending sort by file hierarchy depth, then by file name length. + (stable-sort (delete-duplicates file-names) + (lambda (f1 f2) + (let ((depth1 (file-name-depth f1)) + (depth2 (file-name-depth f2))) + (if (= depth1 depth2) + (string< f1 f2) + (< depth1 depth2))))))) + (reverse (fold (lambda (file-name results) + (if (find (cut file-prefix? <> file-name) results) + results ;parent found -- skipping + (cons file-name results))) + '() + file-names/sorted)))) (define* (file-system-device->string device #:key uuid-type) "Return the string representations of the DEVICE field of a @@ -624,9 +647,6 @@ store is located, else #f." s (string-append "/" s))) - (define (file-name-depth file-name) - (length (string-tokenize file-name %not-slash))) - (and-let* ((btrfs-subvolume-fs (filter btrfs-subvolume? file-systems)) (btrfs-subvolume-fs* (sort btrfs-subvolume-fs -- cgit v1.2.3