From 602994847b748937b6fa39a7b819429857cdd8d3 Mon Sep 17 00:00:00 2001 From: Tobias Geerinckx-Rice Date: Sat, 15 May 2021 15:29:40 +0200 Subject: file-systems: Support forced checks & repairs. * gnu/build/file-systems.scm (check-ext2-file-system) (check-bcachefs-file-system, check-btrfs-file-system) (check-fat-file-system, check-jfs-file-system, check-f2fs-file-system) (check-ntfs-file-system, check-file-system): Take and honour new FORCE? and REPAIR arguments. Update the docstring. Adjust all callers. * gnu/system/file-systems.scm : Add new SKIP-CHECK-IF-CLEAN? and REPAIR fields. (file-system->spec, spec->file-system): Adjust accordingly. * gnu/build/linux-boot.scm (mount-root-file-system): Take new SKIP-CHECK-IF-CLEAN? and REPAIR keyword arguments. Thread them through to CHECK-FILE-SYSTEM. * doc/guix.texi (File Systems): Document both new options. --- doc/guix.texi | 34 ++++++- gnu/build/file-systems.scm | 209 +++++++++++++++++++++++++++++++------------- gnu/build/linux-boot.scm | 19 +++- gnu/system/file-systems.scm | 20 ++++- 4 files changed, 212 insertions(+), 70 deletions(-) diff --git a/doc/guix.texi b/doc/guix.texi index b15a45a977..a62578be26 100644 --- a/doc/guix.texi +++ b/doc/guix.texi @@ -14187,8 +14187,38 @@ initial RAM disk (initrd) is loaded. This is always the case, for instance, for the root file system. @item @code{check?} (default: @code{#t}) -This Boolean indicates whether the file system needs to be checked for -errors before being mounted. +This Boolean indicates whether the file system should be checked for +errors before being mounted. How and when this happens can be further +adjusted with the following options. + +@item @code{skip-check-if-clean?} (default: @code{#t}) +When true, this Boolean indicates that a file system check triggered +by @code{check?} may exit early if the file system is marked as +``clean'', meaning that it was previously correctly unmounted and +should not contain errors. + +Setting this to false will always force a full consistency check when +@code{check?} is true. This may take a very long time and is not +recommended on healthy systems---in fact, it may reduce reliability! + +Conversely, some primitive file systems like @code{fat} do not keep +track of clean shutdowns and will perform a full scan regardless of the +value of this option. + +@item @code{repair} (default: @code{'preen}) +When @code{check?} finds errors, it can (try to) repair them and +continue booting. This option controls when and how to do so. + +If false, try not to modify the file system at all. Checking certain +file systems like @code{jfs} may still write to the device to replay +the journal. No repairs will be attempted. + +If @code{#t}, try to repair any errors found and assume ``yes'' to +all questions. This will fix the most errors, but may be risky. + +If @code{'preen}, repair only errors that are safe to fix without +human interaction. What that means is left up to the developers of +each file system and may be equivalent to ``none'' or ``all''. @item @code{create-mount-point?} (default: @code{#f}) When true, the mount point is created if it does not exist yet. diff --git a/gnu/build/file-systems.scm b/gnu/build/file-systems.scm index 4eeb81cf26..a54127e888 100644 --- a/gnu/build/file-systems.scm +++ b/gnu/build/file-systems.scm @@ -170,10 +170,19 @@ if DEVICE does not contain an ext2 file system." #f if SBLOCK has no volume name." (null-terminated-latin1->string (sub-bytevector sblock 120 16))) -(define (check-ext2-file-system device) - "Return the health of an ext2 file system on DEVICE." +(define (check-ext2-file-system device force? repair) + "Return the health of an unmounted ext2 file system on DEVICE. If FORCE? is +true, check the file system even if it's marked as clean. If REPAIR is false, +do not write to the file system to fix errors. If it's #t, fix all +errors. Otherwise, fix only those considered safe to repair automatically." (match (status:exit-val - (system* "e2fsck" "-v" "-p" "-C" "0" device)) + (apply system* `("e2fsck" "-v" "-C" "0" + ,@(if force? '("-f") '()) + ,@(match repair + (#f '("-n")) + (#t '("-y")) + (_ '("-p"))) + ,device))) (0 'pass) (1 'errors-corrected) (2 'reboot-required) @@ -260,15 +269,23 @@ bytevector." #f if SBLOCK has no volume name." (null-terminated-latin1->string (sub-bytevector sblock 72 32))) -(define (check-bcachefs-file-system device) - "Return the health of a bcachefs file system on DEVICE." +(define (check-bcachefs-file-system device force? repair) + "Return the health of an unmounted bcachefs file system on DEVICE. If FORCE? +is true, check the file system even if it's marked as clean. If REPAIR is +false, do not write to the file system to fix errors. If it's #t, fix all +errors. Otherwise, fix only those considered safe to repair automatically." (let ((ignored-bits (logior 2)) ; DEVICE was mounted read-only (status ;; A number, or #f on abnormal termination (e.g., assertion failure). (status:exit-val - (apply system* "bcachefs" "fsck" "-p" "-v" - ;; Make each multi-device member a separate argument. - (string-split device #\:))))) + (apply system* `("bcachefs" "fsck" "-v" + ,@(if force? '("-f") '()) + ,@(match repair + (#f '("-n")) + (#t '("-y")) + (_ '("-p"))) + ;; Make each multi-device member a separate argument. + ,@(string-split device #\:)))))) (match (and=> status (cut logand <> (lognot ignored-bits))) (0 'pass) (1 'errors-corrected) @@ -304,12 +321,28 @@ if DEVICE does not contain a btrfs file system." #f if SBLOCK has no volume name." (null-terminated-latin1->string (sub-bytevector sblock 299 256))) -(define (check-btrfs-file-system device) - "Return the health of a btrfs file system on DEVICE." - (match (status:exit-val - (system* "btrfs" "device" "scan")) - (0 'pass) - (_ 'fatal-error))) +(define (check-btrfs-file-system device force? repair) + "Return the health of an unmounted btrfs file system on DEVICE. If FORCE? is +false, return 'PASS unconditionally as btrfs claims no need for off-line checks. +When FORCE? is true, do perform a real check. This is not recommended! See +@uref{https://bugzilla.redhat.com/show_bug.cgi?id=625967#c8}. If REPAIR is +false, do not write to DEVICE. If it's #t, fix any errors found. Otherwise, +fix only those considered safe to repair automatically." + ;; XXX Why make this conditional on (check? #t) at all? + (system* "btrfs" "device" "scan") ; ignore errors + (if force? + (match (status:exit-val + (apply system* `("btrfs" "check" "--progress" + ;; Btrfs's ‘--force’ is not relevant to us here. + ,@(match repair + ;; Upstream considers ALL repairs dangerous + ;; and will warn the user at run time. + (#t '("--repair")) + (_ '("--readonly"))) ; a no-op for clarity + ,device))) + (0 'pass) + (_ 'fatal-error)) + 'pass)) ;;; @@ -338,10 +371,17 @@ if DEVICE does not contain a btrfs file system." Trailing spaces are trimmed." (string-trim-right (latin1->string (sub-bytevector sblock 71 11) (lambda (c) #f)) #\space)) -(define (check-fat-file-system device) - "Return the health of a fat file system on DEVICE." +(define (check-fat-file-system device force? repair) + "Return the health of an unmounted FAT file system on DEVICE. FORCE? is +ignored: a full file system scan is always performed. If REPAIR is false, do +not write to the file system to fix errors. Otherwise, automatically fix them +using the least destructive approach." (match (status:exit-val - (system* "fsck.vfat" "-v" "-a" device)) + (apply system* `("fsck.vfat" "-v" + ,@(match repair + (#f '("-n")) + (_ '("-a"))) ; no 'safe/#t distinction + ,device))) (0 'pass) (1 'errors-corrected) (_ 'fatal-error))) @@ -463,10 +503,28 @@ if DEVICE does not contain a JFS file system." #f if SBLOCK has no volume name." (null-terminated-latin1->string (sub-bytevector sblock 152 16))) -(define (check-jfs-file-system device) - "Return the health of a JFS file system on DEVICE." +(define (check-jfs-file-system device force? repair) + "Return the health of an unmounted JFS file system on DEVICE. If FORCE? is +true, check the file system even if it's marked as clean. If REPAIR is false, +do not write to the file system to fix errors, and replay the transaction log +only if FORCE? is true. Otherwise, replay the transaction log before checking +and automatically fix found errors." (match (status:exit-val - (system* "jfs_fsck" "-p" "-v" device)) + (apply system* + `("jfs_fsck" "-v" + ;; The ‘LEVEL’ logic is convoluted. To quote fsck/xchkdsk.c + ;; (‘-p’, ‘-a’, and ‘-r’ are aliases in every way): + ;; “If -f was chosen, have it override [-p] by [forcing] a + ;; check regardless of the outcome after the log is + ;; replayed”. + ;; “If -n is specified by itself, don't replay the journal. + ;; If -n is specified with [-p], replay the journal but + ;; don't make any other changes”. + ,@(if force? '("-f") '()) + ,@(match repair + (#f '("-n")) + (_ '("-p"))) ; no 'safe/#t distinction + ,device))) (0 'pass) (1 'errors-corrected) (2 'reboot-required) @@ -517,12 +575,22 @@ if DEVICE does not contain an F2FS file system." (sub-bytevector sblock (- (+ #x470 12) #x400) 512) %f2fs-endianness)) -(define (check-f2fs-file-system device) - "Return the health of a F2FS file system on DEVICE." +(define (check-f2fs-file-system device force? repair) + "Return the health of an unmuounted F2FS file system on DEVICE. If FORCE? is +true, check the file system even if it's marked as clean. If either FORCE? or +REPAIR are true, automatically fix found errors." + ;; There's no ‘-n’ equivalent (‘--dry-run’ does not disable writes). + ;; ’-y’ is an alias of ‘-f’. The man page is bad: read main.c. + (when (and force? (not repair)) + (format (current-error-port) + "warning: forced check of F2FS ~a implies repairing any errors~%" + device)) (match (status:exit-val - (system* "fsck.f2fs" "-p" device)) - ;; 0 and -1 are the only two possibilities - ;; (according to the manpage) + (apply system* `("fsck.f2fs" + ,@(if force? '("-f") '()) + ,@(if repair '("-p") '("--dry-run")) + ,device))) + ;; 0 and -1 are the only two possibilities according to the man page. (0 'pass) (_ 'fatal-error))) @@ -600,10 +668,15 @@ if DEVICE does not contain a NTFS file system." ;; in the BOOT SECTOR like the UUID, but in the MASTER FILE TABLE, which seems ;; way harder to access. -(define (check-ntfs-file-system device) - "Return the health of a NTFS file system on DEVICE." +(define (check-ntfs-file-system device force? repair) + "Return the health of an unmounted NTFS file system on DEVICE. FORCE? is +ignored: a full check is always performed. Repair is not possible: if REPAIR is +true and the volume has been repaired by an external tool, clear the volume +dirty flag to indicate that it's now safe to mount." (match (status:exit-val - (system* "ntfsfix" device)) + (apply system* `("ntfsfix" + ,@(if repair '("--clear-dirty") '("--no-action")) + ,device))) (0 'pass) (_ 'fatal-error))) @@ -816,8 +889,13 @@ containing ':/')." (uuid-bytevector spec) uuid->string)))) -(define (check-file-system device type) - "Run a file system check of TYPE on DEVICE." +(define (check-file-system device type force? repair) + "Check an unmounted TYPE file system on DEVICE. Do nothing but warn if it is +mounted. If FORCE? is true, check even when considered unnecessary. If REPAIR +is false, try not to write to DEVICE at all. If it's #t, try to fix all errors +found. Otherwise, fix only those considered safe to repair automatically. Not +all TYPEs support all values or combinations of FORCE? and REPAIR. Don't throw +an exception in such cases but perform the nearest sane action." (define check-procedure (cond ((string-prefix? "ext" type) check-ext2-file-system) @@ -831,33 +909,40 @@ containing ':/')." (else #f))) (if check-procedure - (match (check-procedure device) - ('pass - #t) - ('errors-corrected - (format (current-error-port) - "File system check corrected errors on ~a; continuing~%" - device)) - ('reboot-required - (format (current-error-port) - "File system check corrected errors on ~a; rebooting~%" - device) - (sleep 3) - (reboot)) - ('fatal-error - (format (current-error-port) "File system check on ~a failed~%" - device) - - ;; Spawn a REPL only if someone would be able to interact with it. - (when (isatty? (current-input-port)) - (format (current-error-port) "Spawning Bourne-like REPL.~%") - - ;; 'current-output-port' is typically connected to /dev/klog (in - ;; PID 1), but here we want to make sure we talk directly to the - ;; user. - (with-output-to-file "/dev/console" - (lambda () - (start-repl %bournish-language)))))) + (let ((mount (find (lambda (mount) + (string=? device (mount-source mount))) + (mounts)))) + (if mount + (format (current-error-port) + "Refusing to check ~a file system already mounted at ~a~%" + device (mount-point mount)) + (match (check-procedure device force? repair) + ('pass + #t) + ('errors-corrected + (format (current-error-port) + "File system check corrected errors on ~a; continuing~%" + device)) + ('reboot-required + (format (current-error-port) + "File system check corrected errors on ~a; rebooting~%" + device) + (sleep 3) + (reboot)) + ('fatal-error + (format (current-error-port) "File system check on ~a failed~%" + device) + + ;; Spawn a REPL only if someone might interact with it. + (when (isatty? (current-input-port)) + (format (current-error-port) "Spawning Bourne-like REPL.~%") + + ;; 'current-output-port' is typically connected to /dev/klog + ;; (in PID 1), but here we want to make sure we talk directly + ;; to the user. + (with-output-to-file "/dev/console" + (lambda () + (start-repl %bournish-language)))))))) (format (current-error-port) "No file system check procedure for ~a; skipping~%" device))) @@ -886,7 +971,11 @@ corresponds to the symbols listed in FLAGS." (() 0)))) -(define* (mount-file-system fs #:key (root "/root")) +(define* (mount-file-system fs #:key (root "/root") + (check? (file-system-check? fs)) + (skip-check-if-clean? + (file-system-skip-check-if-clean? fs)) + (repair (file-system-repair fs))) "Mount the file system described by FS, a object, under ROOT." (define (mount-nfs source mount-point type flags options) @@ -924,8 +1013,8 @@ corresponds to the symbols listed in FLAGS." (file-system-mount-flags (statfs source))) 0))) (options (file-system-options fs))) - (when (file-system-check? fs) - (check-file-system source type)) + (when check? + (check-file-system source type (not skip-check-if-clean?) repair)) (catch 'system-error (lambda () diff --git a/gnu/build/linux-boot.scm b/gnu/build/linux-boot.scm index 95d0a1fe79..ab05d1ba5e 100644 --- a/gnu/build/linux-boot.scm +++ b/gnu/build/linux-boot.scm @@ -408,12 +408,17 @@ the last argument of `mknod'." (define* (mount-root-file-system root type #:key volatile-root? (flags 0) options - check?) + check? skip-check-if-clean? repair) "Mount the root file system of type TYPE at device ROOT. If VOLATILE-ROOT? is true, mount ROOT read-only and make it an overlay with a writable tmpfs using the kernel built-in overlayfs. FLAGS and OPTIONS indicates the options to use to mount ROOT, and behave the same as for the `mount' procedure. -If CHECK? is true, first run ROOT's fsck tool (if any) non-interactively." + +If CHECK? is true, first run ROOT's fsck tool (if any) non-interactively. +If SKIP-CHECK-IF-CLEAN? is true, ask fsck to return immediately if ROOT is +marked as clean. If REPAIR is true, fsck may write to ROOT to perform repairs. +If REPAIR is also 'PREEN, ask fsck to perform only those repairs that it +considers safe." (if volatile-root? (begin @@ -435,7 +440,7 @@ If CHECK? is true, first run ROOT's fsck tool (if any) non-interactively." "lowerdir=/real-root,upperdir=/rw-root/upper,workdir=/rw-root/work")) (begin (when check? - (check-file-system root type)) + (check-file-system root type (not skip-check-if-clean?) repair)) (mount root "/root" type flags options))) ;; Make sure /root/etc/mtab is a symlink to /proc/self/mounts. @@ -612,7 +617,13 @@ upon error." #:options root-options #:check? (if root-fs (file-system-check? root-fs) - #t)) + #t) + #:skip-check-if-clean? + (and=> root-fs + file-system-skip-check-if-clean?) + #:repair (if root-fs + (file-system-repair root-fs) + 'preen)) (mount "none" "/root" "tmpfs")) ;; Mount the specified file systems. diff --git a/gnu/system/file-systems.scm b/gnu/system/file-systems.scm index b9eda80958..0350bf984f 100644 --- a/gnu/system/file-systems.scm +++ b/gnu/system/file-systems.scm @@ -3,6 +3,7 @@ ;;; Copyright © 2020 Google LLC ;;; Copyright © 2020 Jakub Kądziołka ;;; Copyright © 2020, 2021 Maxim Cournoyer +;;; Copyright © 2021 Tobias Geerinckx-Rice ;;; ;;; This file is part of GNU Guix. ;;; @@ -51,6 +52,8 @@ file-system-mount? file-system-mount-may-fail? file-system-check? + file-system-skip-check-if-clean? + file-system-repair file-system-create-mount-point? file-system-dependencies file-system-location @@ -123,6 +126,10 @@ (default #f)) (check? file-system-check? ; Boolean (default #t)) + (skip-check-if-clean? file-system-skip-check-if-clean? ; Boolean + (default #f)) + (repair file-system-repair ; symbol or #f + (default 'preen)) (create-mount-point? file-system-create-mount-point? ; Boolean (default #f)) (dependencies file-system-dependencies ; list of @@ -318,19 +325,22 @@ store--e.g., if FS is the root file system." initrd code." (match fs (($ device mount-point type flags options mount? - mount-may-fail? needed-for-boot? check?) + mount-may-fail? needed-for-boot? + check? skip-check-if-clean? repair) ;; Note: Add new fields towards the end for compatibility. (list (cond ((uuid? device) `(uuid ,(uuid-type device) ,(uuid-bytevector device))) ((file-system-label? device) `(file-system-label ,(file-system-label->string device))) (else device)) - mount-point type flags options mount-may-fail? check?)))) + mount-point type flags options mount-may-fail? + check? skip-check-if-clean? repair)))) (define (spec->file-system sexp) "Deserialize SEXP, a list, to the corresponding object." (match sexp - ((device mount-point type flags options mount-may-fail? check? + ((device mount-point type flags options mount-may-fail? + check? skip-check-if-clean? repair _ ...) ;placeholder for new fields (file-system (device (match device @@ -343,7 +353,9 @@ initrd code." (mount-point mount-point) (type type) (flags flags) (options options) (mount-may-fail? mount-may-fail?) - (check? check?))))) + (check? check?) + (skip-check-if-clean? skip-check-if-clean?) + (repair repair))))) (define (specification->file-system-mapping spec writable?) "Read the SPEC and return the corresponding . SPEC is -- cgit v1.2.3