diff options
Diffstat (limited to 'guix/store')
-rw-r--r-- | guix/store/database.scm | 234 | ||||
-rw-r--r-- | guix/store/deduplication.scm | 148 |
2 files changed, 382 insertions, 0 deletions
diff --git a/guix/store/database.scm b/guix/store/database.scm new file mode 100644 index 0000000000..3623c0e7a0 --- /dev/null +++ b/guix/store/database.scm @@ -0,0 +1,234 @@ +;;; GNU Guix --- Functional package management for GNU +;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org> +;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org> +;;; +;;; This file is part of GNU Guix. +;;; +;;; GNU Guix is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; GNU Guix is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. + +(define-module (guix store database) + #:use-module (sqlite3) + #:use-module (guix config) + #:use-module (guix serialization) + #:use-module (guix store deduplication) + #:use-module (guix base16) + #:use-module (guix build syscalls) + #:use-module (srfi srfi-11) + #:use-module (srfi srfi-19) + #:use-module (ice-9 match) + #:export (sqlite-register + register-path + reset-timestamps)) + +;;; Code for working with the store database directly. + + +(define-syntax-rule (with-database file db exp ...) + "Open DB from FILE and close it when the dynamic extent of EXP... is left." + (let ((db (sqlite-open file))) + (dynamic-wind noop + (lambda () + exp ...) + (lambda () + (sqlite-close db))))) + +(define (last-insert-row-id db) + ;; XXX: (sqlite3) currently lacks bindings for 'sqlite3_last_insert_rowid'. + ;; Work around that. + (let* ((stmt (sqlite-prepare db "SELECT last_insert_rowid();" + #:cache? #t)) + (result (sqlite-fold cons '() stmt))) + (sqlite-finalize stmt) + (match result + ((#(id)) id) + (_ #f)))) + +(define path-id-sql + "SELECT id FROM ValidPaths WHERE path = :path") + +(define* (path-id db path) + "If PATH exists in the 'ValidPaths' table, return its numerical +identifier. Otherwise, return #f." + (let ((stmt (sqlite-prepare db path-id-sql #:cache? #t))) + (sqlite-bind-arguments stmt #:path path) + (let ((result (sqlite-fold cons '() stmt))) + (sqlite-finalize stmt) + (match result + ((#(id) . _) id) + (_ #f))))) + +(define update-sql + "UPDATE ValidPaths SET hash = :hash, registrationTime = :time, deriver = +:deriver, narSize = :size WHERE id = :id") + +(define insert-sql + "INSERT INTO ValidPaths (path, hash, registrationTime, deriver, narSize) +VALUES (:path, :hash, :time, :deriver, :size)") + +(define* (update-or-insert db #:key path deriver hash nar-size time) + "The classic update-if-exists and insert-if-doesn't feature that sqlite +doesn't exactly have... they've got something close, but it involves deleting +and re-inserting instead of updating, which causes problems with foreign keys, +of course. Returns the row id of the row that was modified or inserted." + (let ((id (path-id db path))) + (if id + (let ((stmt (sqlite-prepare db update-sql #:cache? #t))) + (sqlite-bind-arguments stmt #:id id + #:path path #:deriver deriver + #:hash hash #:size nar-size #:time time) + (sqlite-fold cons '() stmt) + (sqlite-finalize stmt) + (last-insert-row-id db)) + (let ((stmt (sqlite-prepare db insert-sql #:cache? #t))) + (sqlite-bind-arguments stmt + #:path path #:deriver deriver + #:hash hash #:size nar-size #:time time) + (sqlite-fold cons '() stmt) ;execute it + (sqlite-finalize stmt) + (last-insert-row-id db))))) + +(define add-reference-sql + "INSERT OR IGNORE INTO Refs (referrer, reference) SELECT :referrer, id +FROM ValidPaths WHERE path = :reference") + +(define (add-references db referrer references) + "REFERRER is the id of the referring store item, REFERENCES is a list +containing store items being referred to. Note that all of the store items in +REFERENCES must already be registered." + (let ((stmt (sqlite-prepare db add-reference-sql #:cache? #t))) + (for-each (lambda (reference) + (sqlite-reset stmt) + (sqlite-bind-arguments stmt #:referrer referrer + #:reference reference) + (sqlite-fold cons '() stmt) ;execute it + (sqlite-finalize stmt) + (last-insert-row-id db)) + references))) + +;; XXX figure out caching of statement and database objects... later +(define* (sqlite-register #:key db-file path (references '()) + deriver hash nar-size) + "Registers this stuff in a database specified by DB-FILE. PATH is the string +path of some store item, REFERENCES is a list of string paths which the store +item PATH refers to (they need to be already registered!), DERIVER is a string +path of the derivation that created the store item PATH, HASH is the +base16-encoded sha256 hash of the store item denoted by PATH (prefixed with +\"sha256:\") after being converted to nar form, and nar-size is the size in +bytes of the store item denoted by PATH after being converted to nar form." + (with-database db-file db + (let ((id (update-or-insert db #:path path + #:deriver deriver + #:hash hash + #:nar-size nar-size + #:time (time-second (current-time time-utc))))) + (add-references db id references)))) + + +;;; +;;; High-level interface. +;;; + +;; TODO: Factorize with that in (gnu build install). +(define (reset-timestamps file) + "Reset the modification time on FILE and on all the files it contains, if +it's a directory." + (let loop ((file file) + (type (stat:type (lstat file)))) + (case type + ((directory) + (utime file 0 0 0 0) + (let ((parent file)) + (for-each (match-lambda + (("." . _) #f) + ((".." . _) #f) + ((file . properties) + (let ((file (string-append parent "/" file))) + (loop file + (match (assoc-ref properties 'type) + ((or 'unknown #f) + (stat:type (lstat file))) + (type type)))))) + (scandir* parent)))) + ((symlink) + ;; FIXME: Implement bindings for 'futime' to reset the timestamps on + ;; symlinks. + #f) + (else + (utime file 0 0 0 0))))) + +;; TODO: make this canonicalize store items that are registered. This involves +;; setting permissions and timestamps, I think. Also, run a "deduplication +;; pass", whatever that involves. Also, handle databases not existing yet +;; (what should the default behavior be? Figuring out how the C++ stuff +;; currently does it sounds like a lot of grepping for global +;; variables...). Also, return #t on success like the documentation says we +;; should. + +(define* (register-path path + #:key (references '()) deriver prefix + state-directory (deduplicate? #t)) + ;; Priority for options: first what is given, then environment variables, + ;; then defaults. %state-directory, %store-directory, and + ;; %store-database-directory already handle the "environment variables / + ;; defaults" question, so we only need to choose between what is given and + ;; those. + "Register PATH as a valid store file, with REFERENCES as its list of +references, and DERIVER as its deriver (.drv that led to it.) If PREFIX is +given, it must be the name of the directory containing the new store to +initialize; if STATE-DIRECTORY is given, it must be a string containing the +absolute file name to the state directory of the store being initialized. +Return #t on success. + +Use with care as it directly modifies the store! This is primarily meant to +be used internally by the daemon's build hook." + (let* ((db-dir (cond + (state-directory + (string-append state-directory "/db")) + (prefix + ;; If prefix is specified, the value of NIX_STATE_DIR + ;; (which affects %state-directory) isn't supposed to + ;; affect db-dir, only the compile-time-customized + ;; default should. + (string-append prefix %localstatedir "/guix/db")) + (else + %store-database-directory))) + (store-dir (if prefix + ;; same situation as above + (string-append prefix %storedir) + %store-directory)) + (to-register (if prefix + (string-append %storedir "/" (basename path)) + ;; note: we assume here that if path is, for + ;; example, /foo/bar/gnu/store/thing.txt and prefix + ;; isn't given, then an environment variable has + ;; been used to change the store directory to + ;; /foo/bar/gnu/store, since otherwise real-path + ;; would end up being /gnu/store/thing.txt, which is + ;; probably not the right file in this case. + path)) + (real-path (string-append store-dir "/" (basename path)))) + (let-values (((hash nar-size) + (nar-sha256 real-path))) + (reset-timestamps real-path) + (sqlite-register + #:db-file (string-append db-dir "/db.sqlite") + #:path to-register + #:references references + #:deriver deriver + #:hash (string-append "sha256:" + (bytevector->base16-string hash)) + #:nar-size nar-size) + + (when deduplicate? + (deduplicate real-path hash #:store store-dir))))) diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm new file mode 100644 index 0000000000..4b4ac01f64 --- /dev/null +++ b/guix/store/deduplication.scm @@ -0,0 +1,148 @@ +;;; GNU Guix --- Functional package management for GNU +;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org> +;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org> +;;; +;;; This file is part of GNU Guix. +;;; +;;; GNU Guix is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; GNU Guix is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. + +;;; This houses stuff we do to files when they arrive at the store - resetting +;;; timestamps, deduplicating, etc. + +(define-module (guix store deduplication) + #:use-module (guix hash) + #:use-module (guix build utils) + #:use-module (guix base16) + #:use-module (srfi srfi-11) + #:use-module (rnrs io ports) + #:use-module (ice-9 ftw) + #:use-module (guix serialization) + #:export (nar-sha256 + deduplicate)) + +;; Would it be better to just make WRITE-FILE give size as well? I question +;; the general utility of this approach. +(define (counting-wrapper-port output-port) + "Some custom ports don't implement GET-POSITION at all. But if we want to +figure out how many bytes are being written, we will want to use that. So this +makes a wrapper around a port which implements GET-POSITION." + (let ((byte-count 0)) + (make-custom-binary-output-port "counting-wrapper" + (lambda (bytes offset count) + (set! byte-count + (+ byte-count count)) + (put-bytevector output-port bytes + offset count) + count) + (lambda () + byte-count) + #f + (lambda () + (close-port output-port))))) + +(define (nar-sha256 file) + "Gives the sha256 hash of a file and the size of the file in nar form." + (let-values (((port get-hash) (open-sha256-port))) + (let ((wrapper (counting-wrapper-port port))) + (write-file file wrapper) + (force-output wrapper) + (force-output port) + (let ((hash (get-hash)) + (size (port-position wrapper))) + (close-port wrapper) + (values hash size))))) + +(define (tempname-in directory) + "Gives an unused temporary name under DIRECTORY. Not guaranteed to still be +unused by the time you create anything with that name, but a good shot." + (let ((const-part (string-append directory "/.tmp-link-" + (number->string (getpid))))) + (let try ((guess-part + (number->string (random most-positive-fixnum) 16))) + (if (file-exists? (string-append const-part "-" guess-part)) + (try (number->string (random most-positive-fixnum) 16)) + (string-append const-part "-" guess-part))))) + +(define* (get-temp-link target #:optional (link-prefix (dirname target))) + "Like mkstemp!, but instead of creating a new file and giving you the name, +it creates a new hardlink to TARGET and gives you the name. Since +cross-filesystem hardlinks don't work, the temp link must be created on the +same filesystem - where in that filesystem it is can be controlled by +LINK-PREFIX." + (let try ((tempname (tempname-in link-prefix))) + (catch 'system-error + (lambda () + (link target tempname) + tempname) + (lambda (args) + (if (= (system-error-errno args) EEXIST) + (try (tempname-in link-prefix)) + (throw 'system-error args)))))) + +;; There are 3 main kinds of errors we can get from hardlinking: "Too many +;; things link to this" (EMLINK), "this link already exists" (EEXIST), and +;; "can't fit more stuff in this directory" (ENOSPC). + +(define (replace-with-link target to-replace) + "Atomically replace the file TO-REPLACE with a link to TARGET. Note: TARGET +and TO-REPLACE must be on the same file system." + (let ((temp-link (get-temp-link target (dirname to-replace)))) + (rename-file temp-link to-replace))) + +(define-syntax-rule (false-if-system-error (errors ...) exp ...) + "Given ERRORS, a list of system error codes to ignore, evaluates EXP... and +return #f if any of the system error codes in the given list are thrown." + (catch 'system-error + (lambda () + exp ...) + (lambda args + (if (member (system-error-errno args) (list errors ...)) + #f + (apply throw args))))) + +(define* (deduplicate path hash #:key (store %store-directory)) + "Check if a store item with sha256 hash HASH already exists. If so, +replace PATH with a hardlink to the already-existing one. If not, register +PATH so that future duplicates can hardlink to it. PATH is assumed to be +under STORE." + (let* ((links-directory (string-append store "/.links")) + (link-file (string-append links-directory "/" + (bytevector->base16-string hash)))) + (mkdir-p links-directory) + (if (file-is-directory? path) + ;; Can't hardlink directories, so hardlink their atoms. + (for-each (lambda (file) + (unless (member file '("." "..")) + (deduplicate file (nar-sha256 file) + #:store store))) + (scandir path)) + (if (file-exists? link-file) + (false-if-system-error (EMLINK) + (replace-with-link link-file path)) + (catch 'system-error + (lambda () + (link path link-file)) + (lambda args + (let ((errno (system-error-errno args))) + (cond ((= errno EEXIST) + ;; Someone else put an entry for PATH in + ;; LINKS-DIRECTORY before we could. Let's use it. + (false-if-system-error (EMLINK) + (replace-with-link path link-file))) + ((= errno ENOSPC) + ;; There's not enough room in the directory index for + ;; more entries in .links, but that's fine: we can + ;; just stop. + #f) + (else (apply throw args)))))))))) |