From bbe66a530a014e8146d63002a5294941e935f863 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Fri, 21 Dec 2018 22:54:02 +0100 Subject: offload: Decompose 'machine-load' into simpler procedures. * guix/scripts/offload.scm (machine-load): Remove. (node-load, normalized-load): New procedures. (choose-build-machine): Call 'open-ssh-session' and 'make-node' from here; pass the node to 'node-load'. (check-machine-status): Use 'node-load' instead of 'machine-load'. Call 'disconnect!' on SESSION. --- guix/scripts/offload.scm | 92 +++++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 44 deletions(-) (limited to 'guix/scripts/offload.scm') diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index ee5857e16b..c345d438d1 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -392,33 +392,31 @@ MACHINE." (build-requirements-features requirements) (build-machine-features machine)))) -(define (machine-load machine) - "Return the load of MACHINE, divided by the number of parallel builds -allowed on MACHINE. Return +∞ if MACHINE is unreachable." - ;; Note: This procedure is costly since it creates a new SSH session. - (match (false-if-exception (open-ssh-session machine)) - ((? session? session) - (let* ((pipe (open-remote-pipe* session OPEN_READ - "cat" "/proc/loadavg")) - (line (read-line pipe))) - (close-port pipe) - (disconnect! session) - - (if (eof-object? line) - +inf.0 ;MACHINE does not respond, so assume it is infinitely loaded - (match (string-tokenize line) - ((one five fifteen . x) - (let* ((raw (string->number one)) - (jobs (build-machine-parallel-builds machine)) - (normalized (/ raw jobs))) - (format (current-error-port) "load on machine '~a' is ~s\ +(define (node-load node) + "Return the load on NODE. Return +∞ if NODE is misbehaving." + (let ((line (node-eval node + '(begin + (use-modules (ice-9 rdelim)) + (call-with-input-file "/proc/loadavg" + read-string))))) + (if (eof-object? line) + +inf.0 ;MACHINE does not respond, so assume it is infinitely loaded + (match (string-tokenize line) + ((one five fifteen . x) + (string->number one)) + (x + +inf.0))))) + +(define (normalized-load machine load) + "Divide LOAD by the number of parallel builds of MACHINE." + (if (rational? load) + (let* ((jobs (build-machine-parallel-builds machine)) + (normalized (/ load jobs))) + (format (current-error-port) "load on machine '~a' is ~s\ (normalized: ~s)~%" - (build-machine-name machine) raw normalized) - normalized)) - (x - +inf.0))))) ;something's fishy about MACHINE, so avoid it - (x - +inf.0))) ;failed to connect to MACHINE, so avoid it + (build-machine-name machine) load normalized) + normalized) + load)) (define (machine-lock-file machine hint) "Return the name of MACHINE's lock file for HINT." @@ -484,21 +482,25 @@ slot (which must later be released with 'release-build-slot'), or #f and #f." (match machines+slots (((best slot) others ...) ;; Return the best machine unless it's already overloaded. - ;; Note: We call 'machine-load' only as a last resort because it is + ;; Note: We call 'node-load' only as a last resort because it is ;; too costly to call it once for every machine. - (if (< (machine-load best) 2.) - (match others - (((machines slots) ...) - ;; Release slots from the uninteresting machines. - (for-each release-build-slot slots) - - ;; The caller must keep SLOT to protect it from GC and to - ;; eventually release it. - (values best slot))) - (begin - ;; BEST is overloaded, so try the next one. - (release-build-slot slot) - (loop others)))) + (let* ((session (false-if-exception (open-ssh-session best))) + (node (and session (make-node session))) + (load (and node (normalized-load best (node-load node))))) + (when session (disconnect! session)) + (if (and node (< load 2.)) + (match others + (((machines slots) ...) + ;; Release slots from the uninteresting machines. + (for-each release-build-slot slots) + + ;; The caller must keep SLOT to protect it from GC and to + ;; eventually release it. + (values best slot))) + (begin + ;; BEST is overloaded, so try the next one. + (release-build-slot slot) + (loop others))))) (() (values #f #f)))))) @@ -689,16 +691,18 @@ machine." (info (G_ "getting status of ~a build machines defined in '~a'...~%") (length machines) machine-file) (for-each (lambda (machine) - (let* ((node (make-node (open-ssh-session machine))) - (uts (node-eval node '(uname)))) + (let* ((session (open-ssh-session machine)) + (node (make-node session)) + (uts (node-eval node '(uname))) + (load (node-load node))) + (disconnect! session) (format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\ host name: ~a~% normalized load: ~a~%" (build-machine-name machine) (utsname:sysname uts) (utsname:release uts) (utsname:machine uts) (utsname:nodename uts) - (parameterize ((current-error-port (%make-void-port "rw+"))) - (machine-load machine))))) + load))) machines))) -- cgit v1.2.3 From 63b0c3eaccdf1816b419632cd7fe721934d2eb27 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Fri, 21 Dec 2018 23:12:52 +0100 Subject: offload: Skip machines that are low on disk space. Fixes . * guix/scripts/offload.scm (node-free-disk-space): New procedure. (%minimum-disk-space): New variable. (choose-build-machine): Call 'node-free-disk-space' and take it into account in addition to LOAD. (check-machine-status): Display the free disk space. --- guix/scripts/offload.scm | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'guix/scripts/offload.scm') diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index c345d438d1..0bedcb402f 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -321,6 +321,13 @@ hook." (set-port-revealed! port 1) port)) +(define (node-free-disk-space node) + "Return the free disk space, in bytes, in NODE's store." + (node-eval node + `(begin + (use-modules (guix build syscalls)) + (free-disk-space ,(%store-prefix))))) + (define* (transfer-and-offload drv machine #:key (inputs '()) @@ -392,6 +399,12 @@ MACHINE." (build-requirements-features requirements) (build-machine-features machine)))) +(define %minimum-disk-space + ;; Minimum disk space required on the build machine for a build to be + ;; offloaded. This keeps us from offloading to machines that are bound to + ;; run out of disk space. + (* 100 (expt 2 20))) ;100 MiB + (define (node-load node) "Return the load on NODE. Return +∞ if NODE is misbehaving." (let ((line (node-eval node @@ -486,9 +499,10 @@ slot (which must later be released with 'release-build-slot'), or #f and #f." ;; too costly to call it once for every machine. (let* ((session (false-if-exception (open-ssh-session best))) (node (and session (make-node session))) - (load (and node (normalized-load best (node-load node))))) + (load (and node (normalized-load best (node-load node)))) + (space (and node (node-free-disk-space node)))) (when session (disconnect! session)) - (if (and node (< load 2.)) + (if (and node (< load 2.) (>= space %minimum-disk-space)) (match others (((machines slots) ...) ;; Release slots from the uninteresting machines. @@ -498,7 +512,13 @@ slot (which must later be released with 'release-build-slot'), or #f and #f." ;; eventually release it. (values best slot))) (begin - ;; BEST is overloaded, so try the next one. + ;; BEST is unsuitable, so try the next one. + (when (and space (< space %minimum-disk-space)) + (format (current-error-port) + "skipping machine '~a' because it is low \ +on disk space (~,2f MiB free)~%" + (build-machine-name best) + (/ space (expt 2 20) 1.))) (release-build-slot slot) (loop others))))) (() @@ -694,15 +714,17 @@ machine." (let* ((session (open-ssh-session machine)) (node (make-node session)) (uts (node-eval node '(uname))) - (load (node-load node))) + (load (node-load node)) + (free (node-free-disk-space node))) (disconnect! session) (format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\ - host name: ~a~% normalized load: ~a~%" + host name: ~a~% normalized load: ~a~% free disk space: ~,2f MiB~%" (build-machine-name machine) (utsname:sysname uts) (utsname:release uts) (utsname:machine uts) (utsname:nodename uts) - load))) + load + (/ free (expt 2 20) 1.)))) machines))) -- cgit v1.2.3 From b96e05aefd7a4f734cfec3b27c2d38320d43b687 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Fri, 21 Dec 2018 23:31:19 +0100 Subject: offload: Recognize build failures due to lack of disk space. Previously, if a remote build would fail due to lack of disk space, this would be considered a permanent failure and thus cached as a build failure if the local daemon runs with '--cache-failures'. * guix/scripts/offload.scm (transfer-and-offload): Upon 'nix-protocol-error?' call 'node-free-disk-space' and return 1 instead of 100 if the result if lower than 10 MiB. --- guix/scripts/offload.scm | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'guix/scripts/offload.scm') diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index 0bedcb402f..1e0ea1c4c6 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -367,9 +367,19 @@ MACHINE." (derivation-file-name drv) (build-machine-name machine) (nix-protocol-error-message c)) - ;; Use exit code 100 for a permanent build failure. The daemon - ;; interprets other non-zero codes as transient build failures. - (primitive-exit 100))) + (let* ((space (false-if-exception + (node-free-disk-space (make-node session))))) + + ;; Use exit code 100 for a permanent build failure. The daemon + ;; interprets other non-zero codes as transient build failures. + (if (and space (< space (* 10 (expt 2 20)))) + (begin + (format (current-error-port) + (G_ "build failure may have been caused by lack \ +of free disk space on '~a'~%") + (build-machine-name machine)) + (primitive-exit 1)) + (primitive-exit 100))))) (parameterize ((current-build-output-port (build-log-port))) (build-derivations store (list drv)))) -- cgit v1.2.3 From 62b845c5e2c28a360102f095548e3dc3e9cf3200 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Sat, 22 Dec 2018 14:24:49 +0100 Subject: offload: Display the normalized load in 'guix offload status' output. Fixes a regression introduced in bbe66a530a014e8146d63002a5294941e935f863 whereby the actual load (non-normalized) would be displayed. * guix/scripts/offload.scm (check-machine-status): Add call to 'normalized-load'. --- guix/scripts/offload.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'guix/scripts/offload.scm') diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index 1e0ea1c4c6..bfdaa3c011 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -733,7 +733,7 @@ machine." (utsname:sysname uts) (utsname:release uts) (utsname:machine uts) (utsname:nodename uts) - load + (normalized-load machine load) (/ free (expt 2 20) 1.)))) machines))) -- cgit v1.2.3 From ed7b44370f71126087eb953f36aad8dc4c44109f Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Mon, 24 Dec 2018 15:40:04 +0100 Subject: offload: Use (guix inferior) instead of (ssh dist node). Using inferiors and thus 'guix repl' simplifies setup on build machines (no need to worry about GUILE_LOAD_PATH etc.) Furthermore, the 'guix repl -t machine' protocol running in a remote pipe addresses several issues with the current implementation of nodes and RREPLs in Guile-SSH: fewer round trips, doesn't leave a 'guile --listen' process behind it, stateless (since a new process is started each time), more efficient (the SSH channel can be reused), more reliable (no 'pgrep', 'pkill', and shellology; see as an example.) * guix/ssh.scm (inferior-remote-eval): New procedure. (send-files): Use it instead of 'make-node' and 'node-eval'. * guix/scripts/offload.scm (node-guile-version): New procedure. (node-free-disk-space, transfer-and-offload, node-load) (choose-build-machine, assert-node-has-guix): Use 'remote-inferior' instead of 'make-node' and 'inferior-eval' instead of 'node-eval'. (assert-node-can-import, assert-node-can-export): Likewise, and add 'session' parameter. (check-machine-availability): Likewise, and add calls to 'close-inferior' and 'disconnect!'. (check-machine-status): Likewise. * doc/guix.texi (Daemon Offload Setup): Remove bit related to 'guile' in $PATH and $GUILE_LOAD_PATH; mention 'guix' alone. --- doc/guix.texi | 8 ++-- guix/scripts/offload.scm | 107 +++++++++++++++++++++++++---------------------- guix/ssh.scm | 34 ++++++++++----- 3 files changed, 83 insertions(+), 66 deletions(-) (limited to 'guix/scripts/offload.scm') diff --git a/doc/guix.texi b/doc/guix.texi index f86a2885a7..c182995b2b 100644 --- a/doc/guix.texi +++ b/doc/guix.texi @@ -1051,13 +1051,11 @@ name, and they will be scheduled on matching build machines. @end table @end deftp -The @code{guile} command must be in the search path on the build -machines. In addition, the Guix modules must be in -@code{$GUILE_LOAD_PATH} on the build machine---you can check whether -this is the case by running: +The @command{guix} command must be in the search path on the build +machines. You can check whether this is the case by running: @example -ssh build-machine guile -c "'(use-modules (guix config))'" +ssh build-machine guix repl --version @end example There is one last thing to do once @file{machines.scm} is in place. As diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index bfdaa3c011..b472d202a9 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -23,13 +23,12 @@ #:use-module (ssh session) #:use-module (ssh channel) #:use-module (ssh popen) - #:use-module (ssh dist) - #:use-module (ssh dist node) #:use-module (ssh version) #:use-module (guix config) #:use-module (guix records) #:use-module (guix ssh) #:use-module (guix store) + #:use-module (guix inferior) #:use-module (guix derivations) #:use-module ((guix serialization) #:select (nar-error? nar-error-file)) @@ -321,12 +320,15 @@ hook." (set-port-revealed! port 1) port)) +(define (node-guile-version node) + (inferior-eval '(version) node)) + (define (node-free-disk-space node) "Return the free disk space, in bytes, in NODE's store." - (node-eval node - `(begin - (use-modules (guix build syscalls)) - (free-disk-space ,(%store-prefix))))) + (inferior-eval `(begin + (use-modules (guix build syscalls)) + (free-disk-space ,(%store-prefix))) + node)) (define* (transfer-and-offload drv machine #:key @@ -367,8 +369,12 @@ MACHINE." (derivation-file-name drv) (build-machine-name machine) (nix-protocol-error-message c)) - (let* ((space (false-if-exception - (node-free-disk-space (make-node session))))) + (let* ((inferior (false-if-exception (remote-inferior session))) + (space (false-if-exception + (node-free-disk-space inferior)))) + + (when inferior + (close-inferior inferior)) ;; Use exit code 100 for a permanent build failure. The daemon ;; interprets other non-zero codes as transient build failures. @@ -417,11 +423,11 @@ of free disk space on '~a'~%") (define (node-load node) "Return the load on NODE. Return +∞ if NODE is misbehaving." - (let ((line (node-eval node - '(begin - (use-modules (ice-9 rdelim)) - (call-with-input-file "/proc/loadavg" - read-string))))) + (let ((line (inferior-eval '(begin + (use-modules (ice-9 rdelim)) + (call-with-input-file "/proc/loadavg" + read-string)) + node))) (if (eof-object? line) +inf.0 ;MACHINE does not respond, so assume it is infinitely loaded (match (string-tokenize line) @@ -508,9 +514,10 @@ slot (which must later be released with 'release-build-slot'), or #f and #f." ;; Note: We call 'node-load' only as a last resort because it is ;; too costly to call it once for every machine. (let* ((session (false-if-exception (open-ssh-session best))) - (node (and session (make-node session))) + (node (and session (remote-inferior session))) (load (and node (normalized-load best (node-load node)))) (space (and node (node-free-disk-space node)))) + (when node (close-inferior node)) (when session (disconnect! session)) (if (and node (< load 2.) (>= space %minimum-disk-space)) (match others @@ -613,18 +620,17 @@ If TIMEOUT is #f, simply evaluate EXP..." (#f (report-guile-error name)) ((? string? version) - ;; Note: The version string already contains the word "Guile". - (info (G_ "'~a' is running ~a~%") + (info (G_ "'~a' is running GNU Guile ~a~%") name (node-guile-version node))))) (define (assert-node-has-guix node name) "Bail out if NODE lacks the (guix) module, or if its daemon is not running." (catch 'node-repl-error (lambda () - (match (node-eval node - '(begin - (use-modules (guix)) - (and add-text-to-store 'alright))) + (match (inferior-eval '(begin + (use-modules (guix)) + (and add-text-to-store 'alright)) + node) ('alright #t) (_ (report-module-error name)))) (lambda (key . args) @@ -632,12 +638,12 @@ If TIMEOUT is #f, simply evaluate EXP..." (catch 'node-repl-error (lambda () - (match (node-eval node - '(begin - (use-modules (guix)) - (with-store store - (add-text-to-store store "test" - "Hello, build machine!")))) + (match (inferior-eval '(begin + (use-modules (guix)) + (with-store store + (add-text-to-store store "test" + "Hello, build machine!"))) + node) ((? string? str) (info (G_ "Guix is usable on '~a' (test returned ~s)~%") name str)) @@ -656,25 +662,23 @@ If TIMEOUT is #f, simply evaluate EXP..." (string-append name "-" (number->string (random 1000000 (force %random-state))))) -(define (assert-node-can-import node name daemon-socket) +(define (assert-node-can-import session node name daemon-socket) "Bail out if NODE refuses to import our archives." - (let ((session (node-session node))) - (with-store store - (let* ((item (add-text-to-store store "export-test" (nonce))) - (remote (connect-to-remote-daemon session daemon-socket))) - (with-store local - (send-files local (list item) remote)) - - (if (valid-path? remote item) - (info (G_ "'~a' successfully imported '~a'~%") - name item) - (leave (G_ "'~a' was not properly imported on '~a'~%") - item name)))))) - -(define (assert-node-can-export node name daemon-socket) + (with-store store + (let* ((item (add-text-to-store store "export-test" (nonce))) + (remote (connect-to-remote-daemon session daemon-socket))) + (with-store local + (send-files local (list item) remote)) + + (if (valid-path? remote item) + (info (G_ "'~a' successfully imported '~a'~%") + name item) + (leave (G_ "'~a' was not properly imported on '~a'~%") + item name))))) + +(define (assert-node-can-export session node name daemon-socket) "Bail out if we cannot import signed archives from NODE." - (let* ((session (node-session node)) - (remote (connect-to-remote-daemon session daemon-socket)) + (let* ((remote (connect-to-remote-daemon session daemon-socket)) (item (add-text-to-store remote "import-test" (nonce name)))) (with-store store (if (and (retrieve-files store (list item) remote) @@ -701,11 +705,13 @@ machine." (let* ((names (map build-machine-name machines)) (sockets (map build-machine-daemon-socket machines)) (sessions (map open-ssh-session machines)) - (nodes (map make-node sessions))) + (nodes (map remote-inferior sessions))) (for-each assert-node-repl nodes names) (for-each assert-node-has-guix nodes names) - (for-each assert-node-can-import nodes names sockets) - (for-each assert-node-can-export nodes names sockets)))) + (for-each assert-node-can-import sessions nodes names sockets) + (for-each assert-node-can-export sessions nodes names sockets) + (for-each close-inferior nodes) + (for-each disconnect! sessions)))) (define (check-machine-status machine-file pred) "Print the load of each machine matching PRED in MACHINE-FILE." @@ -722,10 +728,11 @@ machine." (length machines) machine-file) (for-each (lambda (machine) (let* ((session (open-ssh-session machine)) - (node (make-node session)) - (uts (node-eval node '(uname))) - (load (node-load node)) - (free (node-free-disk-space node))) + (inferior (remote-inferior session)) + (uts (inferior-eval '(uname) inferior)) + (load (node-load inferior)) + (free (node-free-disk-space inferior))) + (close-inferior inferior) (disconnect! session) (format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\ host name: ~a~% normalized load: ~a~% free disk space: ~,2f MiB~%" diff --git a/guix/ssh.scm b/guix/ssh.scm index b8bea8028a..1ed8406633 100644 --- a/guix/ssh.scm +++ b/guix/ssh.scm @@ -27,8 +27,6 @@ #:use-module (ssh channel) #:use-module (ssh popen) #:use-module (ssh session) - #:use-module (ssh dist) - #:use-module (ssh dist node) #:use-module (srfi srfi-1) #:use-module (srfi srfi-11) #:use-module (srfi srfi-26) @@ -102,6 +100,20 @@ Throw an error on failure." "guix" "repl" "-t" "machine"))) (port->inferior pipe))) +(define (inferior-remote-eval exp session) + "Evaluate EXP in a new inferior running in SESSION, and close the inferior +right away." + (let ((inferior (remote-inferior session))) + (dynamic-wind + (const #t) + (lambda () + (inferior-eval exp inferior)) + (lambda () + ;; Close INFERIOR right away to prevent finalization from happening in + ;; another thread at the wrong time (see + ;; .) + (close-inferior inferior))))) + (define* (remote-daemon-channel session #:optional (socket-name @@ -277,15 +289,15 @@ Return the list of store items actually sent." ;; Compute the subset of FILES missing on SESSION and send them. (let* ((files (if recursive? (requisites local files) files)) (session (channel-get-session (nix-server-socket remote))) - (node (make-node session)) - (missing (node-eval node - `(begin - (use-modules (guix) - (srfi srfi-1) (srfi srfi-26)) - - (with-store store - (remove (cut valid-path? store <>) - ',files))))) + (missing (inferior-remote-eval + `(begin + (use-modules (guix) + (srfi srfi-1) (srfi srfi-26)) + + (with-store store + (remove (cut valid-path? store <>) + ',files))) + session)) (count (length missing)) (sizes (map (lambda (item) (path-info-nar-size (query-path-info local item))) -- cgit v1.2.3 From 10b2834f82b7502dc2dc733d39d97f9ff2d07564 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Tue, 25 Dec 2018 17:03:37 +0100 Subject: offload: Adjust 'test' and 'status' to the latest changes. This is a followup to ed7b44370f71126087eb953f36aad8dc4c44109f; following that commit, 'guix offload test' and 'guix offload status' would abort with a backtrace instead of clearly diagnosing a missing 'guix' command on the build machine. * guix/scripts/offload.scm (assert-node-has-guix): Call 'leave' when NODE is not an inferior. Remove 'catch' blocks for 'node-repl-error'. (check-machine-availability): Invoke 'assert-node-has-guix' first. (check-machine-status): Print a warning when 'remote-inferior' returns #f. --- guix/scripts/offload.scm | 90 +++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 44 deletions(-) (limited to 'guix/scripts/offload.scm') diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index b472d202a9..dcdccc80e0 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -624,35 +624,30 @@ If TIMEOUT is #f, simply evaluate EXP..." name (node-guile-version node))))) (define (assert-node-has-guix node name) - "Bail out if NODE lacks the (guix) module, or if its daemon is not running." - (catch 'node-repl-error - (lambda () - (match (inferior-eval '(begin - (use-modules (guix)) - (and add-text-to-store 'alright)) - node) - ('alright #t) - (_ (report-module-error name)))) - (lambda (key . args) - (report-module-error name))) - - (catch 'node-repl-error - (lambda () - (match (inferior-eval '(begin - (use-modules (guix)) - (with-store store - (add-text-to-store store "test" - "Hello, build machine!"))) - node) - ((? string? str) - (info (G_ "Guix is usable on '~a' (test returned ~s)~%") - name str)) - (x - (leave (G_ "failed to talk to guix-daemon on '~a' (test returned ~s)~%") - name x)))) - (lambda (key . args) - (leave (G_ "remote evaluation on '~a' failed:~{ ~s~}~%") - name args)))) + "Bail out if NODE if #f or if we fail to use the (guix) module, or if its +daemon is not running." + (unless (inferior? node) + (leave (G_ "failed to run 'guix repl' on '~a'~%") name)) + + (match (inferior-eval '(begin + (use-modules (guix)) + (and add-text-to-store 'alright)) + node) + ('alright #t) + (_ (report-module-error name))) + + (match (inferior-eval '(begin + (use-modules (guix)) + (with-store store + (add-text-to-store store "test" + "Hello, build machine!"))) + node) + ((? string? str) + (info (G_ "Guix is usable on '~a' (test returned ~s)~%") + name str)) + (x + (leave (G_ "failed to talk to guix-daemon on '~a' (test returned ~s)~%") + name x)))) (define %random-state (delay @@ -706,8 +701,8 @@ machine." (sockets (map build-machine-daemon-socket machines)) (sessions (map open-ssh-session machines)) (nodes (map remote-inferior sessions))) - (for-each assert-node-repl nodes names) (for-each assert-node-has-guix nodes names) + (for-each assert-node-repl nodes names) (for-each assert-node-can-import sessions nodes names sockets) (for-each assert-node-can-export sessions nodes names sockets) (for-each close-inferior nodes) @@ -727,21 +722,28 @@ machine." (info (G_ "getting status of ~a build machines defined in '~a'...~%") (length machines) machine-file) (for-each (lambda (machine) - (let* ((session (open-ssh-session machine)) - (inferior (remote-inferior session)) - (uts (inferior-eval '(uname) inferior)) - (load (node-load inferior)) - (free (node-free-disk-space inferior))) - (close-inferior inferior) - (disconnect! session) - (format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\ + (define session + (open-ssh-session machine)) + + (match (remote-inferior session) + (#f + (warning (G_ "failed to run 'guix repl' on machine '~a'~%") + (build-machine-name machine))) + ((? inferior? inferior) + (let ((uts (inferior-eval '(uname) inferior)) + (load (node-load inferior)) + (free (node-free-disk-space inferior))) + (close-inferior inferior) + (format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\ host name: ~a~% normalized load: ~a~% free disk space: ~,2f MiB~%" - (build-machine-name machine) - (utsname:sysname uts) (utsname:release uts) - (utsname:machine uts) - (utsname:nodename uts) - (normalized-load machine load) - (/ free (expt 2 20) 1.)))) + (build-machine-name machine) + (utsname:sysname uts) (utsname:release uts) + (utsname:machine uts) + (utsname:nodename uts) + (normalized-load machine load) + (/ free (expt 2 20) 1.))))) + + (disconnect! session)) machines))) -- cgit v1.2.3 From 7f4d102c2fff9ff60cd7bc69f5e7eb694274baae Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Wed, 26 Dec 2018 17:30:56 +0100 Subject: offload: Remove the "machine choice" lock. This lock was unnecessary and it led to a contention when many 'guix offload' processes are polling for available machines. * guix/scripts/offload.scm (machine-choice-lock-file): Remove. (choose-build-machine): Remove surrounding 'with-file-lock (machine-lock-file)'. --- guix/scripts/offload.scm | 119 ++++++++++++++++++++++------------------------- 1 file changed, 56 insertions(+), 63 deletions(-) (limited to 'guix/scripts/offload.scm') diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index dcdccc80e0..f90f9e92fa 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -453,10 +453,6 @@ of free disk space on '~a'~%") (build-machine-name machine) "." (symbol->string hint) ".lock")) -(define (machine-choice-lock-file) - "Return the name of the file used as a lock when choosing a build machine." - (string-append %state-directory "/offload/machine-choice.lock")) - (define (random-seed) (logxor (getpid) (car (gettimeofday)))) @@ -479,67 +475,64 @@ of free disk space on '~a'~%") slot (which must later be released with 'release-build-slot'), or #f and #f." ;; Proceed like this: - ;; 1. Acquire the global machine-choice lock. - ;; 2. For all MACHINES, attempt to acquire a build slot, and filter out + ;; 1. For all MACHINES, attempt to acquire a build slot, and filter out ;; those machines for which we failed. - ;; 3. Choose the best machine among those that are left. - ;; 4. Release the previously-acquired build slots of the other machines. - ;; 5. Release the global machine-choice lock. - - (with-file-lock (machine-choice-lock-file) - (define machines+slots - (filter-map (lambda (machine) - (let ((slot (acquire-build-slot machine))) - (and slot (list machine slot)))) - (shuffle machines))) - - (define (undecorate pred) - (lambda (a b) - (match a - ((machine1 slot1) - (match b - ((machine2 slot2) - (pred machine1 machine2))))))) - - (define (machine-faster? m1 m2) - ;; Return #t if M1 is faster than M2. - (> (build-machine-speed m1) - (build-machine-speed m2))) - - (let loop ((machines+slots - (sort machines+slots (undecorate machine-faster?)))) - (match machines+slots - (((best slot) others ...) - ;; Return the best machine unless it's already overloaded. - ;; Note: We call 'node-load' only as a last resort because it is - ;; too costly to call it once for every machine. - (let* ((session (false-if-exception (open-ssh-session best))) - (node (and session (remote-inferior session))) - (load (and node (normalized-load best (node-load node)))) - (space (and node (node-free-disk-space node)))) - (when node (close-inferior node)) - (when session (disconnect! session)) - (if (and node (< load 2.) (>= space %minimum-disk-space)) - (match others - (((machines slots) ...) - ;; Release slots from the uninteresting machines. - (for-each release-build-slot slots) - - ;; The caller must keep SLOT to protect it from GC and to - ;; eventually release it. - (values best slot))) - (begin - ;; BEST is unsuitable, so try the next one. - (when (and space (< space %minimum-disk-space)) - (format (current-error-port) - "skipping machine '~a' because it is low \ + ;; 2. Choose the best machine among those that are left. + ;; 3. Release the previously-acquired build slots of the other machines. + + (define machines+slots + (filter-map (lambda (machine) + (let ((slot (acquire-build-slot machine))) + (and slot (list machine slot)))) + (shuffle machines))) + + (define (undecorate pred) + (lambda (a b) + (match a + ((machine1 slot1) + (match b + ((machine2 slot2) + (pred machine1 machine2))))))) + + (define (machine-faster? m1 m2) + ;; Return #t if M1 is faster than M2. + (> (build-machine-speed m1) + (build-machine-speed m2))) + + (let loop ((machines+slots + (sort machines+slots (undecorate machine-faster?)))) + (match machines+slots + (((best slot) others ...) + ;; Return the best machine unless it's already overloaded. + ;; Note: We call 'node-load' only as a last resort because it is + ;; too costly to call it once for every machine. + (let* ((session (false-if-exception (open-ssh-session best))) + (node (and session (remote-inferior session))) + (load (and node (normalized-load best (node-load node)))) + (space (and node (node-free-disk-space node)))) + (when node (close-inferior node)) + (when session (disconnect! session)) + (if (and node (< load 2.) (>= space %minimum-disk-space)) + (match others + (((machines slots) ...) + ;; Release slots from the uninteresting machines. + (for-each release-build-slot slots) + + ;; The caller must keep SLOT to protect it from GC and to + ;; eventually release it. + (values best slot))) + (begin + ;; BEST is unsuitable, so try the next one. + (when (and space (< space %minimum-disk-space)) + (format (current-error-port) + "skipping machine '~a' because it is low \ on disk space (~,2f MiB free)~%" - (build-machine-name best) - (/ space (expt 2 20) 1.))) - (release-build-slot slot) - (loop others))))) - (() - (values #f #f)))))) + (build-machine-name best) + (/ space (expt 2 20) 1.))) + (release-build-slot slot) + (loop others))))) + (() + (values #f #f))))) (define (call-with-timeout timeout drv thunk) "Call THUNK and leave after TIMEOUT seconds. If TIMEOUT is #f, simply call -- cgit v1.2.3 From 0ef595b99689a4d80521abd87fa893695c7f75df Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Wed, 26 Dec 2018 17:42:02 +0100 Subject: offload: Remove unnecessary locking on machine slots. This extra level of locking turned out to be unnecessary. * guix/scripts/offload.scm (with-machine-lock): Remove. (machine-lock-file): Remove. (acquire-build-slot): Remove surrounding 'with-machine-lock'. --- guix/scripts/offload.scm | 50 ++++++++++++++++++------------------------------ 1 file changed, 19 insertions(+), 31 deletions(-) (limited to 'guix/scripts/offload.scm') diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index f90f9e92fa..30fe69ad6d 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -260,13 +260,6 @@ instead of '~a' of type '~a'~%") (lambda () (unlock-file port))))) -(define-syntax-rule (with-machine-lock machine hint exp ...) - "Wait to acquire MACHINE's exclusive lock for HINT, and evaluate EXP in that -context." - (with-file-lock (machine-lock-file machine hint) - exp ...)) - - (define (machine-slot-file machine slot) "Return the file name of MACHINE's file for SLOT." ;; For each machine we have a bunch of files representing each build slot. @@ -284,23 +277,25 @@ the slot, or #f if none is available. This mechanism allows us to set a hard limit on the number of simultaneous connections allowed to MACHINE." (mkdir-p (dirname (machine-slot-file machine 0))) - (with-machine-lock machine 'slots - (any (lambda (slot) - (let ((port (open-file (machine-slot-file machine slot) - "w0"))) - (catch 'flock-error - (lambda () - (fcntl-flock port 'write-lock #:wait? #f) - ;; Got it! - (format (current-error-port) - "process ~a acquired build slot '~a'~%" - (getpid) (port-filename port)) - port) - (lambda args - ;; PORT is already locked by another process. - (close-port port) - #f)))) - (iota (build-machine-parallel-builds machine))))) + + ;; When several 'guix offload' processes run in parallel, there's a race + ;; among them, but since they try the slots in the same order, we're fine. + (any (lambda (slot) + (let ((port (open-file (machine-slot-file machine slot) + "w0"))) + (catch 'flock-error + (lambda () + (fcntl-flock port 'write-lock #:wait? #f) + ;; Got it! + (format (current-error-port) + "process ~a acquired build slot '~a'~%" + (getpid) (port-filename port)) + port) + (lambda args + ;; PORT is already locked by another process. + (close-port port) + #f)))) + (iota (build-machine-parallel-builds machine)))) (define (release-build-slot slot) "Release SLOT, a build slot as returned as by 'acquire-build-slot'." @@ -447,12 +442,6 @@ of free disk space on '~a'~%") normalized) load)) -(define (machine-lock-file machine hint) - "Return the name of MACHINE's lock file for HINT." - (string-append %state-directory "/offload/" - (build-machine-name machine) - "." (symbol->string hint) ".lock")) - (define (random-seed) (logxor (getpid) (car (gettimeofday)))) @@ -827,7 +816,6 @@ This tool is meant to be used internally by 'guix-daemon'.\n")) (leave (G_ "invalid arguments: ~{~s ~}~%") x)))) ;;; Local Variables: -;;; eval: (put 'with-machine-lock 'scheme-indent-function 2) ;;; eval: (put 'with-file-lock 'scheme-indent-function 1) ;;; eval: (put 'with-error-to-port 'scheme-indent-function 1) ;;; eval: (put 'with-timeout 'scheme-indent-function 2) -- cgit v1.2.3 From f9e8a12379c6fefc9e5c3c7fc3926599bbefc013 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Mon, 21 Jan 2019 17:41:11 +0100 Subject: store: Rename '&nix-error' to '&store-error'. * guix/store.scm (&nix-error): Rename to... (&store-error): ... this, and adjust users. (&nix-connection-error): Rename to... (&store-connection-error): ... this, and adjust users. (&nix-protocol-error): Rename to... (&store-protocol-error): ... this, adjust users. (&nix-error, &nix-connection-error, &nix-protocol-error): Define these condition types and their getters as deprecrated aliases. * build-aux/run-system-tests.scm, guix/derivations.scm, guix/grafts.scm, guix/scripts/challenge.scm, guix/scripts/graph.scm, guix/scripts/lint.scm, guix/scripts/offload.scm, guix/serialization.scm, guix/ssh.scm, guix/tests.scm, guix/ui.scm, tests/derivations.scm, tests/gexp.scm, tests/guix-daemon.sh, tests/packages.scm, tests/store.scm, doc/guix.texi: Adjust to use the new names. --- build-aux/run-system-tests.scm | 2 +- doc/guix.texi | 2 +- guix/derivations.scm | 2 +- guix/grafts.scm | 2 +- guix/scripts/challenge.scm | 2 +- guix/scripts/graph.scm | 2 +- guix/scripts/lint.scm | 4 +-- guix/scripts/offload.scm | 6 ++-- guix/serialization.scm | 2 +- guix/ssh.scm | 6 ++-- guix/store.scm | 77 ++++++++++++++++++++++++++++-------------- guix/tests.scm | 2 +- guix/ui.scm | 10 +++--- tests/derivations.scm | 42 +++++++++++------------ tests/gexp.scm | 4 +-- tests/guix-daemon.sh | 8 ++--- tests/packages.scm | 2 +- tests/store.scm | 46 ++++++++++++------------- 18 files changed, 123 insertions(+), 98 deletions(-) (limited to 'guix/scripts/offload.scm') diff --git a/build-aux/run-system-tests.scm b/build-aux/run-system-tests.scm index bcd7547704..fd1f6653af 100644 --- a/build-aux/run-system-tests.scm +++ b/build-aux/run-system-tests.scm @@ -30,7 +30,7 @@ (define (built-derivations* drv) (lambda (store) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) (values #f store))) (values (build-derivations store drv) store)))) diff --git a/doc/guix.texi b/doc/guix.texi index 245a18bc70..e70fed2f1c 100644 --- a/doc/guix.texi +++ b/doc/guix.texi @@ -5027,7 +5027,7 @@ Return @code{#t} when @var{path} designates a valid store item and invalid, for instance because it is the result of an aborted or failed build.) -A @code{&nix-protocol-error} condition is raised if @var{path} is not +A @code{&store-protocol-error} condition is raised if @var{path} is not prefixed by the store directory (@file{/gnu/store}). @end deffn diff --git a/guix/derivations.scm b/guix/derivations.scm index f6176a78fd..fb2fa177be 100644 --- a/guix/derivations.scm +++ b/guix/derivations.scm @@ -113,7 +113,7 @@ ;;; Error conditions. ;;; -(define-condition-type &derivation-error &nix-error +(define-condition-type &derivation-error &store-error derivation-error? (derivation derivation-error-derivation)) diff --git a/guix/grafts.scm b/guix/grafts.scm index db9c6854fd..a3e12f6efd 100644 --- a/guix/grafts.scm +++ b/guix/grafts.scm @@ -189,7 +189,7 @@ available." items))) (define (references* items) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) ;; As a last resort, build DRV and query the references of the ;; build result. diff --git a/guix/scripts/challenge.scm b/guix/scripts/challenge.scm index f0693ed8df..65de42053d 100644 --- a/guix/scripts/challenge.scm +++ b/guix/scripts/challenge.scm @@ -109,7 +109,7 @@ "Return the hash of ITEM, a store item, if ITEM was built locally. Otherwise return #f." (lambda (store) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) (values #f store))) (if (locally-built? store item) (values (query-path-hash store item) store) diff --git a/guix/scripts/graph.scm b/guix/scripts/graph.scm index 145a574dba..8efeef3274 100644 --- a/guix/scripts/graph.scm +++ b/guix/scripts/graph.scm @@ -299,7 +299,7 @@ this type of graph"))))))) information available in the local store or using information about substitutes." (lambda (store) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) (match (substitutable-path-info store (list item)) ((info) (values (substitutable-references info) store)) diff --git a/guix/scripts/lint.scm b/guix/scripts/lint.scm index 665adcfb8d..ddad5b7fd0 100644 --- a/guix/scripts/lint.scm +++ b/guix/scripts/lint.scm @@ -833,11 +833,11 @@ descriptions maintained upstream." (define (try system) (catch #t (lambda () - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) (emit-warning package (format #f (G_ "failed to create ~a derivation: ~a") system - (nix-protocol-error-message c)))) + (store-protocol-error-message c)))) ((message-condition? c) (emit-warning package (format #f (G_ "failed to create ~a derivation: ~a") diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index 30fe69ad6d..2116b38425 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2014, 2015, 2016, 2017, 2018 Ludovic Courtès +;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019 Ludovic Courtès ;;; Copyright © 2017 Ricardo Wurmus ;;; ;;; This file is part of GNU Guix. @@ -358,12 +358,12 @@ MACHINE." (format (current-error-port) "@ build-remote ~a ~a~%" (derivation-file-name drv) (build-machine-name machine)) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) (format (current-error-port) (G_ "derivation '~a' offloaded to '~a' failed: ~a~%") (derivation-file-name drv) (build-machine-name machine) - (nix-protocol-error-message c)) + (store-protocol-error-message c)) (let* ((inferior (false-if-exception (remote-inferior session))) (space (false-if-exception (node-free-disk-space inferior)))) diff --git a/guix/serialization.scm b/guix/serialization.scm index 7c0fea552d..e14b7d1b9f 100644 --- a/guix/serialization.scm +++ b/guix/serialization.scm @@ -59,7 +59,7 @@ ;; Similar to serialize.cc in Nix. -(define-condition-type &nar-error &error ; XXX: inherit from &nix-error ? +(define-condition-type &nar-error &error ; XXX: inherit from &store-error ? nar-error? (file nar-error-file) ; file we were restoring, or #f (port nar-error-port)) ; port from which we read diff --git a/guix/ssh.scm b/guix/ssh.scm index 77329618d5..2b286a67b2 100644 --- a/guix/ssh.scm +++ b/guix/ssh.scm @@ -328,17 +328,17 @@ Return the list of store items actually sent." missing) (('protocol-error message) (raise (condition - (&nix-protocol-error (message message) (status 42))))) + (&store-protocol-error (message message) (status 42))))) (('error key args ...) (raise (condition - (&nix-protocol-error + (&store-protocol-error (message (call-with-output-string (lambda (port) (print-exception port #f key args)))) (status 43))))) (_ (raise (condition - (&nix-protocol-error + (&store-protocol-error (message "unknown error while sending files over SSH") (status 44))))))))) diff --git a/guix/store.scm b/guix/store.scm index f8c79788b8..d079147529 100644 --- a/guix/store.scm +++ b/guix/store.scm @@ -68,6 +68,15 @@ current-store-protocol-version ;for internal use mcached + &store-error store-error? + &store-connection-error store-connection-error? + store-connection-error-file + store-connection-error-code + &store-protocol-error store-protocol-error? + store-protocol-error-message + store-protocol-error-status + + ;; Deprecated forms for '&store-error' et al. &nix-error nix-error? &nix-connection-error nix-connection-error? nix-connection-error-file @@ -377,34 +386,50 @@ (define-deprecated/alias nix-server-socket store-connection-socket) -(define-condition-type &nix-error &error - nix-error?) +(define-condition-type &store-error &error + store-error?) -(define-condition-type &nix-connection-error &nix-error - nix-connection-error? - (file nix-connection-error-file) - (errno nix-connection-error-code)) +(define-condition-type &store-connection-error &store-error + store-connection-error? + (file store-connection-error-file) + (errno store-connection-error-code)) + +(define-condition-type &store-protocol-error &store-error + store-protocol-error? + (message store-protocol-error-message) + (status store-protocol-error-status)) + +(define-deprecated/alias &nix-error &store-error) +(define-deprecated/alias nix-error? store-error?) +(define-deprecated/alias &nix-connection-error &store-connection-error) +(define-deprecated/alias nix-connection-error? store-connection-error?) +(define-deprecated/alias nix-connection-error-file + store-connection-error-file) +(define-deprecated/alias nix-connection-error-code + store-connection-error-code) +(define-deprecated/alias &nix-protocol-error &store-protocol-error) +(define-deprecated/alias nix-protocol-error? store-protocol-error?) +(define-deprecated/alias nix-protocol-error-message + store-protocol-error-message) +(define-deprecated/alias nix-protocol-error-status + store-protocol-error-status) -(define-condition-type &nix-protocol-error &nix-error - nix-protocol-error? - (message nix-protocol-error-message) - (status nix-protocol-error-status)) (define-syntax-rule (system-error-to-connection-error file exp ...) "Catch 'system-error' exceptions and translate them to -'&nix-connection-error'." +'&store-connection-error'." (catch 'system-error (lambda () exp ...) (lambda args (let ((errno (system-error-errno args))) - (raise (condition (&nix-connection-error + (raise (condition (&store-connection-error (file file) (errno errno)))))))) (define (open-unix-domain-socket file) "Connect to the Unix-domain socket at FILE and return it. Raise a -'&nix-connection-error' upon error." +'&store-connection-error' upon error." (let ((s (with-fluids ((%default-port-encoding #f)) ;; This trick allows use of the `scm_c_read' optimization. (socket PF_UNIX SOCK_STREAM 0))) @@ -420,7 +445,7 @@ (define (open-inet-socket host port) "Connect to the Unix-domain socket at HOST:PORT and return it. Raise a -'&nix-connection-error' upon error." +'&store-connection-error' upon error." (let ((sock (with-fluids ((%default-port-encoding #f)) ;; This trick allows use of the `scm_c_read' optimization. (socket PF_UNIX SOCK_STREAM 0)))) @@ -452,7 +477,7 @@ ;; Connection failed, so try one of the other addresses. (close s) (if (null? rest) - (raise (condition (&nix-connection-error + (raise (condition (&store-connection-error (file host) (errno (system-error-errno args))))) (loop rest)))))))))) @@ -461,7 +486,7 @@ "Connect to the daemon at URI, a string that may be an actual URI or a file name." (define (not-supported) - (raise (condition (&nix-connection-error + (raise (condition (&store-connection-error (file uri) (errno ENOTSUP))))) @@ -510,8 +535,8 @@ for this connection will be pinned. Return a server object." ;; One of the 'write-' or 'read-' calls below failed, but this is ;; really a connection error. (raise (condition - (&nix-connection-error (file (or port uri)) - (errno EPROTO)) + (&store-connection-error (file (or port uri)) + (errno EPROTO)) (&message (message "build daemon handshake failed")))))) (let*-values (((port) (or port (connect-to-daemon uri))) @@ -689,14 +714,14 @@ encoding conversion errors." (not (eof-object? (lookahead-u8 p)))) (read-int p) 1))) - (raise (condition (&nix-protocol-error + (raise (condition (&store-protocol-error (message error) (status status)))))) ((= k %stderr-last) ;; The daemon is done (see `stopWork' in `nix-worker.cc'.) #t) (else - (raise (condition (&nix-protocol-error + (raise (condition (&store-protocol-error (message "invalid error code") (status k)))))))) @@ -926,7 +951,7 @@ bytevector) as its internal buffer, and a thunk to flush this output port." invalid item may exist on disk but still be invalid, for instance because it is the result of an aborted or failed build.) -A '&nix-protocol-error' condition is raised if PATH is not prefixed by the +A '&store-protocol-error' condition is raised if PATH is not prefixed by the store directory (/gnu/store)." boolean) @@ -1141,7 +1166,7 @@ Return #t on success." (build store things mode) (if (= mode (build-mode normal)) (build/old store things) - (raise (condition (&nix-protocol-error + (raise (condition (&store-protocol-error (message "unsupported build mode") (status 1)))))))))) @@ -1201,12 +1226,12 @@ error if there is no such root." (define (references/substitutes store items) "Return the list of list of references of ITEMS; the result has the same length as ITEMS. Query substitute information for any item missing from the -store at once. Raise a '&nix-protocol-error' exception if reference +store at once. Raise a '&store-protocol-error' exception if reference information for one of ITEMS is missing." (let* ((requested items) (local-refs (map (lambda (item) (or (hash-ref %reference-cache item) - (guard (c ((nix-protocol-error? c) #f)) + (guard (c ((store-protocol-error? c) #f)) (references store item)))) items)) (missing (fold-right (lambda (item local-ref result) @@ -1222,7 +1247,7 @@ information for one of ITEMS is missing." '() (substitutable-path-info store missing)))) (when (< (length substs) (length missing)) - (raise (condition (&nix-protocol-error + (raise (condition (&store-protocol-error (message "cannot determine \ the list of references") (status 1))))) @@ -1673,7 +1698,7 @@ where FILE is the entry's absolute file name and STAT is the result of "Monadic version of 'query-path-info' that returns #f when ITEM is not in the store." (lambda (store) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) ;; ITEM is not in the store; return #f. (values #f store))) (values (query-path-info store item) store)))) diff --git a/guix/tests.scm b/guix/tests.scm index f4948148c4..16a426c4f9 100644 --- a/guix/tests.scm +++ b/guix/tests.scm @@ -64,7 +64,7 @@ (define* (open-connection-for-tests #:optional (uri (%daemon-socket-uri))) "Open a connection to the build daemon for tests purposes and return it." - (guard (c ((nix-error? c) + (guard (c ((store-error? c) (format (current-error-port) "warning: build daemon error: ~s~%" c) #f)) diff --git a/guix/ui.scm b/guix/ui.scm index 1e089753e1..9ff56ea85c 100644 --- a/guix/ui.scm +++ b/guix/ui.scm @@ -684,14 +684,14 @@ or remove one of them from the profile.") file (or (port-filename* port) port)) (leave (G_ "corrupt input while restoring archive from ~s~%") (or (port-filename* port) port))))) - ((nix-connection-error? c) + ((store-connection-error? c) (leave (G_ "failed to connect to `~a': ~a~%") - (nix-connection-error-file c) - (strerror (nix-connection-error-code c)))) - ((nix-protocol-error? c) + (store-connection-error-file c) + (strerror (store-connection-error-code c)))) + ((store-protocol-error? c) ;; FIXME: Server-provided error messages aren't i18n'd. (leave (G_ "build failed: ~a~%") - (nix-protocol-error-message c))) + (store-protocol-error-message c))) ((derivation-missing-output-error? c) (leave (G_ "reference to invalid output '~a' of derivation '~a'~%") (derivation-missing-output c) diff --git a/tests/derivations.scm b/tests/derivations.scm index 5f294c1827..c0601c0e88 100644 --- a/tests/derivations.scm +++ b/tests/derivations.scm @@ -1,5 +1,5 @@ ;;; GNU Guix --- Functional package management for GNU -;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018 Ludovic Courtès +;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Ludovic Courtès ;;; ;;; This file is part of GNU Guix. ;;; @@ -185,9 +185,9 @@ (set-build-options %store #:use-substitutes? #f #:keep-going? #t) - (guard (c ((nix-protocol-error? c) - (and (= 100 (nix-protocol-error-status c)) - (string-contains (nix-protocol-error-message c) + (guard (c ((store-protocol-error? c) + (and (= 100 (store-protocol-error-status c)) + (string-contains (store-protocol-error-message c) (derivation-file-name d1)) (not (valid-path? %store (derivation->output-path d1))) (valid-path? %store (derivation->output-path d2))))) @@ -222,8 +222,8 @@ (test-assert "unknown built-in builder" (let ((drv (derivation %store "ohoh" "builtin:does-not-exist" '()))) - (guard (c ((nix-protocol-error? c) - (string-contains (nix-protocol-error-message c) "failed"))) + (guard (c ((store-protocol-error? c) + (string-contains (store-protocol-error-message c) "failed"))) (build-derivations %store (list drv)) #f))) @@ -253,8 +253,8 @@ . ,(object->string (%local-url)))) #:hash-algo 'sha256 #:hash (sha256 (random-bytevector 100))))) ;wrong - (guard (c ((nix-protocol-error? c) - (string-contains (nix-protocol-error-message c) "failed"))) + (guard (c ((store-protocol-error? c) + (string-contains (store-protocol-error-message c) "failed"))) (build-derivations %store (list drv)) #f)))) @@ -268,8 +268,8 @@ . ,(object->string (%local-url)))) #:hash-algo 'sha256 #:hash (sha256 (random-bytevector 100))))) - (guard (c ((nix-protocol-error? c) - (string-contains (nix-protocol-error-message (pk c)) "failed"))) + (guard (c ((store-protocol-error? c) + (string-contains (store-protocol-error-message (pk c)) "failed"))) (build-derivations %store (list drv)) #f)))) @@ -279,8 +279,8 @@ (drv (derivation %store "world" "builtin:download" '() #:env-vars `(("url" . ,(object->string url)))))) - (guard (c ((nix-protocol-error? c) - (string-contains (nix-protocol-error-message c) "failed"))) + (guard (c ((store-protocol-error? c) + (string-contains (store-protocol-error-message c) "failed"))) (build-derivations %store (list drv)) #f))) @@ -607,7 +607,7 @@ `("-c" ,(string-append "echo " txt "> $out")) #:inputs `((,%bash) (,txt)) #:allowed-references '()))) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) ;; There's no specific error message to check for. #t)) (build-derivations %store (list drv)) @@ -625,7 +625,7 @@ `("-c" ,"echo $out > $out") #:inputs `((,%bash)) #:allowed-references '()))) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) ;; There's no specific error message to check for. #t)) (build-derivations %store (list drv)) @@ -644,7 +644,7 @@ `("-c" ,(string-append "echo " txt "> $out")) #:inputs `((,%bash) (,txt)) #:disallowed-references (list txt)))) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) ;; There's no specific error message to check for. #t)) (build-derivations %store (list drv)) @@ -765,8 +765,8 @@ (builder '(begin (sleep 100) (mkdir %output) #t)) (drv (build-expression->derivation store "silent" builder)) (out-path (derivation->output-path drv))) - (guard (c ((nix-protocol-error? c) - (and (string-contains (nix-protocol-error-message c) + (guard (c ((store-protocol-error? c) + (and (string-contains (store-protocol-error-message c) "failed") (not (valid-path? store out-path))))) (build-derivations store (list drv)) @@ -779,8 +779,8 @@ (builder '(begin (sleep 100) (mkdir %output) #t)) (drv (build-expression->derivation store "slow" builder)) (out-path (derivation->output-path drv))) - (guard (c ((nix-protocol-error? c) - (and (string-contains (nix-protocol-error-message c) + (guard (c ((store-protocol-error? c) + (and (string-contains (store-protocol-error-message c) "failed") (not (valid-path? store out-path))))) (build-derivations store (list drv)) @@ -942,11 +942,11 @@ #f)) ; fail! (drv (build-expression->derivation %store "fail" builder)) (out-path (derivation->output-path drv))) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) ;; Note that the output path may exist at this point, but it ;; is invalid. (and (string-match "build .* failed" - (nix-protocol-error-message c)) + (store-protocol-error-message c)) (not (valid-path? %store out-path))))) (build-derivations %store (list drv)) #f))) diff --git a/tests/gexp.scm b/tests/gexp.scm index c4b437cd49..cee2c96610 100644 --- a/tests/gexp.scm +++ b/tests/gexp.scm @@ -919,7 +919,7 @@ (chdir #$output) (symlink #$%bootstrap-guile "guile")) #:allowed-references '())))) - (guard (c ((nix-protocol-error? c) #t)) + (guard (c ((store-protocol-error? c) #t)) (build-derivations %store (list drv)) #f))) @@ -943,7 +943,7 @@ (chdir #$output) (symlink #$%bootstrap-guile "guile")) #:disallowed-references (list %bootstrap-guile))))) - (guard (c ((nix-protocol-error? c) #t)) + (guard (c ((store-protocol-error? c) #t)) (build-derivations %store (list drv)) #f))) diff --git a/tests/guix-daemon.sh b/tests/guix-daemon.sh index 9ae6e0b77a..4c19a55722 100644 --- a/tests/guix-daemon.sh +++ b/tests/guix-daemon.sh @@ -1,5 +1,5 @@ # GNU Guix --- Functional package management for GNU -# Copyright © 2012, 2014, 2015, 2016, 2017, 2018 Ludovic Courtès +# Copyright © 2012, 2014, 2015, 2016, 2017, 2018, 2019 Ludovic Courtès # # This file is part of GNU Guix. # @@ -109,7 +109,7 @@ guile -c " (define (build-without-failing drv) (lambda (store) - (guard (c ((nix-protocol-error? c) (values #t store))) + (guard (c ((store-protocol-error? c) (values #t store))) (build-derivations store (list drv)) (values #f store)))) @@ -177,9 +177,9 @@ client_code=' `("-e" ,build) #:inputs `((,bash) (,build)) #:env-vars `(("x" . ,(random-text)))))) - (exit (guard (c ((nix-protocol-error? c) + (exit (guard (c ((store-protocol-error? c) (->bool - (string-contains (pk (nix-protocol-error-message c)) + (string-contains (pk (store-protocol-error-message c)) "failed")))) (build-derivations store (list drv)) #f))))' diff --git a/tests/packages.scm b/tests/packages.scm index ed635d9011..29e5e4103c 100644 --- a/tests/packages.scm +++ b/tests/packages.scm @@ -570,7 +570,7 @@ (symlink %output (string-append %output "/self")) #t))))) (d (package-derivation %store p))) - (guard (c ((nix-protocol-error? c) #t)) + (guard (c ((store-protocol-error? c) #t)) (build-derivations %store (list d)) #f))) diff --git a/tests/store.scm b/tests/store.scm index 5ff9308d7d..e28c0c5aaa 100644 --- a/tests/store.scm +++ b/tests/store.scm @@ -63,9 +63,9 @@ (test-equal "connection handshake error" EPROTO (let ((port (%make-void-port "rw"))) - (guard (c ((nix-connection-error? c) - (and (eq? port (nix-connection-error-file c)) - (nix-connection-error-code c)))) + (guard (c ((store-connection-error? c) + (and (eq? port (store-connection-error-file c)) + (store-connection-error-code c)))) (open-connection #f #:port port) 'broken))) @@ -120,7 +120,7 @@ (test-assert "valid-path? error" (with-store s - (guard (c ((nix-protocol-error? c) #t)) + (guard (c ((store-protocol-error? c) #t)) (valid-path? s "foo") #f))) @@ -133,7 +133,7 @@ (with-store s (let-syntax ((true-if-error (syntax-rules () ((_ exp) - (guard (c ((nix-protocol-error? c) #t)) + (guard (c ((store-protocol-error? c) #t)) exp #f))))) (and (true-if-error (valid-path? s "foo")) (true-if-error (valid-path? s "bar")) @@ -274,7 +274,7 @@ (test-assert "references/substitutes missing reference info" (with-store s (set-build-options s #:use-substitutes? #f) - (guard (c ((nix-protocol-error? c) #t)) + (guard (c ((store-protocol-error? c) #t)) (let* ((b (add-to-store s "bash" #t "sha256" (search-bootstrap-binary "bash" (%current-system)))) @@ -422,7 +422,7 @@ %store "foo" `(display ,s) #:guile-for-build (package-derivation s %bootstrap-guile (%current-system))))) - (guard (c ((nix-protocol-error? c) #t)) + (guard (c ((store-protocol-error? c) #t)) (build-derivations %store (list d)))))))) "Here’s a Greek letter: λ.")) @@ -442,7 +442,7 @@ (display "lambda: λ\n")) #:guile-for-build (package-derivation %store %bootstrap-guile)))) - (guard (c ((nix-protocol-error? c) #t)) + (guard (c ((store-protocol-error? c) #t)) (build-derivations %store (list d)))))))) "garbage: �lambda: λ")) @@ -620,12 +620,12 @@ #:fallback? #f #:substitute-urls (%test-substitute-urls)) (and (has-substitutes? s o) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) ;; XXX: the daemon writes "hash mismatch in downloaded ;; path", but the actual error returned to the client ;; doesn't mention that. (pk 'corrupt c) - (not (zero? (nix-protocol-error-status c))))) + (not (zero? (store-protocol-error-status c))))) (build-derivations s (list d)) #f)))))) @@ -646,7 +646,7 @@ (set-build-options s #:use-substitutes? #t #:substitute-urls (%test-substitute-urls)) (and (has-substitutes? s o) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) ;; The substituter failed as expected. Now make ;; sure that #:fallback? #t works correctly. (set-build-options s @@ -712,9 +712,9 @@ (dump (call-with-bytevector-output-port (cute export-paths %store (list file2) <>)))) (delete-paths %store (list file0 file1 file2)) - (guard (c ((nix-protocol-error? c) - (and (not (zero? (nix-protocol-error-status c))) - (string-contains (nix-protocol-error-message c) + (guard (c ((store-protocol-error? c) + (and (not (zero? (store-protocol-error-status c))) + (string-contains (store-protocol-error-message c) "not valid")))) ;; Here we get an exception because DUMP does not include FILE0 and ;; FILE1, which are dependencies of FILE2. @@ -816,10 +816,10 @@ (bytevector-u8-set! dump index (logxor #xff byte))) (and (not (file-exists? file)) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) (pk 'c c) - (and (not (zero? (nix-protocol-error-status c))) - (string-contains (nix-protocol-error-message c) + (and (not (zero? (store-protocol-error-status c))) + (string-contains (store-protocol-error-message c) "corrupt")))) (let* ((source (open-bytevector-input-port dump)) (imported (import-paths %store source))) @@ -906,10 +906,10 @@ (begin (write (random-text) entropy-port) (force-output entropy-port) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) (pk 'determinism-exception c) - (and (not (zero? (nix-protocol-error-status c))) - (string-contains (nix-protocol-error-message c) + (and (not (zero? (store-protocol-error-status c))) + (string-contains (store-protocol-error-message c) "deterministic")))) ;; This one will produce a different result. Since we're in ;; 'check' mode, this must fail. @@ -945,10 +945,10 @@ #:guile-for-build (package-derivation store %bootstrap-guile (%current-system)))) (file (derivation->output-path drv))) - (guard (c ((nix-protocol-error? c) + (guard (c ((store-protocol-error? c) (pk 'multiple-build c) - (and (not (zero? (nix-protocol-error-status c))) - (string-contains (nix-protocol-error-message c) + (and (not (zero? (store-protocol-error-status c))) + (string-contains (store-protocol-error-message c) "deterministic")))) ;; This one will produce a different result on the second run. (current-build-output-port (current-error-port)) -- cgit v1.2.3 From 02ec889e6b8f6593dd90afcb4d60a43ea67be4b8 Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Tue, 22 Jan 2019 17:37:59 +0100 Subject: offload: 'status' reports the time difference. * guix/scripts/offload.scm (check-machine-status): Report the time difference for each MACHINE. --- guix/scripts/offload.scm | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'guix/scripts/offload.scm') diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index 2116b38425..eb02672dbf 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -712,18 +712,31 @@ machine." (warning (G_ "failed to run 'guix repl' on machine '~a'~%") (build-machine-name machine))) ((? inferior? inferior) - (let ((uts (inferior-eval '(uname) inferior)) - (load (node-load inferior)) - (free (node-free-disk-space inferior))) - (close-inferior inferior) - (format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\ - host name: ~a~% normalized load: ~a~% free disk space: ~,2f MiB~%" - (build-machine-name machine) - (utsname:sysname uts) (utsname:release uts) - (utsname:machine uts) - (utsname:nodename uts) - (normalized-load machine load) - (/ free (expt 2 20) 1.))))) + (let ((now (car (gettimeofday)))) + (match (inferior-eval '(list (uname) + (car (gettimeofday))) + inferior) + ((uts time) + (when (< time now) + ;; Build machine clocks must not be behind as this + ;; could cause timestamp issues. + (warning (G_ "machine '~a' is ~a seconds behind~%") + (build-machine-name machine) + (- now time))) + + (let ((load (node-load inferior)) + (free (node-free-disk-space inferior))) + (close-inferior inferior) + (format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\ + host name: ~a~% normalized load: ~a~% free disk space: ~,2f MiB~%\ + time difference: ~a s~%" + (build-machine-name machine) + (utsname:sysname uts) (utsname:release uts) + (utsname:machine uts) + (utsname:nodename uts) + (normalized-load machine load) + (/ free (expt 2 20) 1.) + (- time now)))))))) (disconnect! session)) machines))) -- cgit v1.2.3