summaryrefslogtreecommitdiff
path: root/guix/import/cran.scm
diff options
context:
space:
mode:
Diffstat (limited to 'guix/import/cran.scm')
-rw-r--r--guix/import/cran.scm188
1 files changed, 188 insertions, 0 deletions
diff --git a/guix/import/cran.scm b/guix/import/cran.scm
new file mode 100644
index 0000000000..8ed5e5407f
--- /dev/null
+++ b/guix/import/cran.scm
@@ -0,0 +1,188 @@
+;;; GNU Guix --- Functional package management for GNU
+;;; Copyright © 2015 Ricardo Wurmus <rekado@elephly.net>
+;;;
+;;; This file is part of GNU Guix.
+;;;
+;;; GNU Guix is free software; you can redistribute it and/or modify it
+;;; under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; GNU Guix is distributed in the hope that it will be useful, but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (guix import cran)
+ #:use-module (ice-9 match)
+ #:use-module (ice-9 regex)
+ #:use-module (srfi srfi-1)
+ #:use-module (sxml simple)
+ #:use-module (sxml match)
+ #:use-module (sxml xpath)
+ #:use-module (guix http-client)
+ #:use-module (guix hash)
+ #:use-module (guix store)
+ #:use-module (guix base32)
+ #:use-module ((guix download) #:select (download-to-store))
+ #:use-module (guix import utils)
+ #:export (cran->guix-package))
+
+;;; Commentary:
+;;;
+;;; Generate a package declaration template for the latest version of an R
+;;; package on CRAN, using the HTML description downloaded from
+;;; cran.r-project.org.
+;;;
+;;; Code:
+
+(define string->license
+ (match-lambda
+ ("AGPL-3" 'agpl3+)
+ ("Artistic-2.0" 'artistic2.0)
+ ("Apache License 2.0" 'asl2.0)
+ ("BSD_2_clause" 'bsd-2)
+ ("BSD_3_clause" 'bsd-3)
+ ("GPL-2" 'gpl2+)
+ ("GPL-3" 'gpl3+)
+ ("LGPL-2" 'lgpl2.0+)
+ ("LGPL-2.1" 'lgpl2.1+)
+ ("LGPL-3" 'lgpl3+)
+ ("MIT" 'x11)
+ ((x) (string->license x))
+ ((lst ...) `(list ,@(map string->license lst)))
+ (_ #f)))
+
+(define (format-inputs names)
+ "Generate a sorted list of package inputs from a list of package NAMES."
+ (map (lambda (name)
+ (list name (list 'unquote (string->symbol name))))
+ (sort names string-ci<?)))
+
+(define* (maybe-inputs package-inputs #:optional (type 'inputs))
+ "Given a list of PACKAGE-INPUTS, tries to generate the TYPE field of a
+package definition."
+ (match package-inputs
+ (()
+ '())
+ ((package-inputs ...)
+ `((,type (,'quasiquote ,(format-inputs package-inputs)))))))
+
+(define (table-datum tree label)
+ "Extract the datum node following a LABEL in the sxml table TREE. Only the
+first cell of a table row is considered a label cell."
+ ((node-pos 1)
+ ((sxpath `(xhtml:tr
+ (xhtml:td 1) ; only first cell can contain label
+ (equal? ,label)
+ ,(node-parent tree) ; go up to label cell
+ ,(node-parent tree) ; go up to matching row
+ (xhtml:td 2))) ; select second cell
+ tree)))
+
+(define %cran-url "http://cran.r-project.org/web/packages/")
+
+(define (cran-fetch name)
+ "Return an sxml representation of the CRAN page for the R package NAME,
+or #f on failure. NAME is case-sensitive."
+ ;; This API always returns the latest release of the module.
+ (let ((cran-url (string-append %cran-url name)))
+ (false-if-exception
+ (xml->sxml (http-fetch cran-url)
+ #:trim-whitespace? #t
+ #:namespaces '((xhtml . "http://www.w3.org/1999/xhtml"))
+ #:default-entity-handler
+ (lambda (port name)
+ (case name
+ ((nbsp) " ")
+ ((ge) ">=")
+ ((gt) ">")
+ ((lt) "<")
+ (else
+ (format (current-warning-port)
+ "~a:~a:~a: undefined entitity: ~a\n"
+ cran-url (port-line port) (port-column port)
+ name)
+ (symbol->string name))))))))
+
+(define (cran-sxml->sexp sxml)
+ "Return the `package' s-expression for a CRAN package from the SXML
+representation of the package page."
+ (define (nodes->text nodeset)
+ (string-join ((sxpath '(// *text*)) nodeset) " "))
+
+ (define (guix-name name)
+ (if (string-prefix? "r-" name)
+ (string-downcase name)
+ (string-append "r-" (string-downcase name))))
+
+ (sxml-match-let*
+ (((*TOP* (xhtml:html
+ ,head
+ (xhtml:body
+ (xhtml:h2 ,name-and-synopsis)
+ (xhtml:p ,description)
+ ,summary
+ (xhtml:h4 "Downloads:") ,downloads
+ . ,rest)))
+ sxml))
+ (let* ((name (match:prefix (string-match ": " name-and-synopsis)))
+ (synopsis (match:suffix (string-match ": " name-and-synopsis)))
+ (version (nodes->text (table-datum summary "Version:")))
+ (license ((compose string->license nodes->text)
+ (table-datum summary "License:")))
+ (home-page (nodes->text ((sxpath '((xhtml:a 1)))
+ (table-datum summary "URL:"))))
+ (source-url (string-append "mirror://cran/"
+ ;; Remove double dots, because we want an
+ ;; absolute path.
+ (regexp-substitute/global
+ #f "\\.\\./"
+ (string-join
+ ((sxpath '((xhtml:a 1) @ href *text*))
+ (table-datum downloads
+ " Package source: ")))
+ 'pre 'post)))
+ (tarball (with-store store (download-to-store store source-url)))
+ (sysdepends (map match:substring
+ (list-matches
+ "[^ ]+"
+ ;; Strip off comma and parenthetical
+ ;; expressions.
+ (regexp-substitute/global
+ #f "(,|\\([^\\)]+\\))"
+ (nodes->text (table-datum summary
+ "SystemRequirements:"))
+ 'pre 'post))))
+ (imports (map guix-name
+ ((sxpath '(// xhtml:a *text*))
+ (table-datum summary "Imports:")))))
+ `(package
+ (name ,(guix-name name))
+ (version ,version)
+ (source (origin
+ (method url-fetch)
+ (uri (string-append ,@(factorize-uri source-url version)))
+ (sha256
+ (base32
+ ,(bytevector->nix-base32-string (file-sha256 tarball))))))
+ (build-system r-build-system)
+ ,@(maybe-inputs sysdepends)
+ ,@(maybe-inputs imports 'propagated-inputs)
+ (home-page ,(if (string-null? home-page)
+ (string-append %cran-url name)
+ home-page))
+ (synopsis ,synopsis)
+ ;; Use double spacing
+ (description ,(regexp-substitute/global #f "\\. \\b" description
+ 'pre ". " 'post))
+ (license ,license)))))
+
+(define (cran->guix-package package-name)
+ "Fetch the metadata for PACKAGE-NAME from cran.r-project.org, and return the
+`package' s-expression corresponding to that package, or #f on failure."
+ (let ((module-meta (cran-fetch package-name)))
+ (and=> module-meta cran-sxml->sexp)))