From c3926120ba5ea76f531541b7926ccb6739575e0e Mon Sep 17 00:00:00 2001 From: Roel Janssen Date: Fri, 16 Apr 2021 13:20:53 +0200 Subject: gnu: Update blast+ to 2.11.0. * gnu/packages/bioinformatics.scm (blast+): Update to 2.11.0. --- gnu/packages/bioinformatics.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 41ef4cd513..31205c473a 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -1363,7 +1363,7 @@ package provides command line tools using the Bio++ library.") (define-public blast+ (package (name "blast+") - (version "2.10.1") + (version "2.11.0") (source (origin (method url-fetch) (uri (string-append @@ -1371,7 +1371,7 @@ package provides command line tools using the Bio++ library.") version "/ncbi-blast-" version "+-src.tar.gz")) (sha256 (base32 - "11kvrrl0mcwww6530r55hccpg3x3msmhr3051fwnjbq8rzg2j1qi")) + "0m0r9vkw631ky1za1wilsfk9k9spwqh22nkrb9a57rbwmrc1i3nq")) (modules '((guix build utils))) (snippet '(begin -- cgit v1.2.3 From 92032d8081713d56c3bcf84c7e15b55a22a1e0a3 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Thu, 22 Apr 2021 08:14:51 +0200 Subject: gnu: Add kraken2. * gnu/packages/bioinformatics.scm (kraken2): New variable. --- gnu/packages/bioinformatics.scm | 82 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 31205c473a..cf09fc3add 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -144,6 +144,7 @@ #:use-module (gnu packages tls) #:use-module (gnu packages vim) #:use-module (gnu packages web) + #:use-module (gnu packages wget) #:use-module (gnu packages xml) #:use-module (gnu packages xorg) #:use-module (srfi srfi-1) @@ -15061,6 +15062,87 @@ signaling, and more. It continues to be evolved and expanded by an international community.") (license license:lgpl2.1+))) +(define-public kraken2 + (package + (name "kraken2") + (version "2.1.1") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/DerrickWood/kraken2") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0h7a7vygd7y5isbrnc6srwq6xj1rmyd33pm8mmcgfkmlxlg5vkg3")))) + (build-system gnu-build-system) + (arguments + `(#:tests? #false ; there are none + #:make-flags (list "-C" "src" + (string-append "KRAKEN2_DIR=" + (assoc-ref %outputs "out") "/bin")) + #:phases + (modify-phases %standard-phases + (delete 'configure) + (add-before 'install 'install-scripts + (lambda* (#:key outputs #:allow-other-keys) + (let* ((bin (string-append (assoc-ref outputs "out") "/bin")) + (scripts (find-files "scripts" ".*")) + (replacements `(("KRAKEN2_DIR" . ,bin) + ("VERSION" . ,,version)))) + (mkdir-p bin) + (substitute* scripts + (("#####=([^=]+)=#####" _ key) + (or (assoc-ref replacements key) + (error (format #false "unknown key: ~a~%" key))))) + (substitute* "scripts/kraken2" + (("compression_program = \"bzip2\"") + (string-append "compression_program = \"" + (which "bzip2") + "\"")) + (("compression_program = \"gzip\"") + (string-append "compression_program = \"" + (which "gzip") + "\""))) + (substitute* '("scripts/download_genomic_library.sh" + "scripts/download_taxonomy.sh" + "scripts/16S_gg_installation.sh" + "scripts/16S_silva_installation.sh" + "scripts/16S_rdp_installation.sh") + (("wget") (which "wget"))) + (substitute* "scripts/mask_low_complexity.sh" + (("which") (which "which"))) + (substitute* '("scripts/mask_low_complexity.sh" + "scripts/download_genomic_library.sh" + "scripts/16S_silva_installation.sh") + (("sed -e ") + (string-append (which "sed") " -e "))) + (substitute* '("scripts/rsync_from_ncbi.pl" + "scripts/16S_rdp_installation.sh" + "scripts/16S_silva_installation.sh" + "scripts/16S_gg_installation.sh" + "scripts/download_taxonomy.sh" + "scripts/download_genomic_library.sh") + (("gunzip") (which "gunzip"))) + (for-each (lambda (script) + (chmod script #o555) + (install-file script bin)) + scripts))))))) + (inputs + `(("gzip" ,gzip) + ("perl" ,perl) + ("sed" ,sed) + ("wget" ,wget) + ("which" ,which))) + (home-page "https://github.com/DerrickWood/kraken2") + (synopsis "Taxonomic sequence classification system") + (description "Kraken is a taxonomic sequence classifier that assigns +taxonomic labels to DNA sequences. Kraken examines the k-mers within a query +sequence and uses the information within those k-mers to query a +database. That database maps k-mers to the lowest common ancestor (LCA) of all +genomes known to contain a given k-mer.") + (license license:expat))) + (define-public r-signac (let ((commit "e0512d348adeda4a3f23a2e8f56d1fe09840e03c") (revision "1")) -- cgit v1.2.3 From c9371b698db76fc0ff9133820c6d795280298ed2 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Fri, 23 Apr 2021 15:05:31 +0200 Subject: gnu: Add r-shinycell. * gnu/packages/bioinformatics.scm (r-shinycell): New variable. --- gnu/packages/bioinformatics.scm | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index cf09fc3add..a3dd76c3bd 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -7250,6 +7250,43 @@ clustering analysis, differential analysis, motif inference and exploration of single cell ATAC-seq sequencing data.") (license license:gpl3))) +(define-public r-shinycell + (let ((commit + "aecbd56e66802f28e397f5ae1f19403aadd12163") + (revision "1")) + (package + (name "r-shinycell") + (version (git-version "2.0.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/SGDDNB/ShinyCell") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "13jn2ikmvljnzayk485g1mmq5abcp9m1b8n1djdb1agmn83zaki5")))) + (properties `((upstream-name . "ShinyCell"))) + (build-system r-build-system) + (propagated-inputs + `(("r-data-table" ,r-data-table) + ("r-ggplot2" ,r-ggplot2) + ("r-glue" ,r-glue) + ("r-gridextra" ,r-gridextra) + ("r-hdf5r" ,r-hdf5r) + ("r-matrix" ,r-matrix) + ("r-r-utils" ,r-r-utils) + ("r-rcolorbrewer" ,r-rcolorbrewer) + ("r-readr" ,r-readr) + ("r-reticulate" ,r-reticulate))) + (home-page "https://github.com/SGDDNB/ShinyCell") + (synopsis "Shiny interactive web apps for single-cell data") + (description + "This package provides Shiny apps for interactive exploration of +single-cell data.") + (license license:gpl3)))) + (define-public r-archr (let ((commit "46b519ffb6f73edf132497ac31650d19ef055dc1") (revision "1")) -- cgit v1.2.3 From f1f17c7bba778ec03a5e40a34591564684461df5 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Fri, 23 Apr 2021 15:52:36 +0200 Subject: gnu: Add shorah. * gnu/packages/bioinformatics.scm (shorah): New variable. --- gnu/packages/bioinformatics.scm | 58 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index a3dd76c3bd..0aaf30872e 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -7601,6 +7601,64 @@ Perl and can be helpful if you want to filter, reformat, or trim your sequence data. It also generates basic statistics for your sequences.") (license license:gpl3+))) +(define-public shorah + (package + (name "shorah") + (version "1.99.2") + (source + (origin + (method url-fetch) + (uri (string-append "https://github.com/cbg-ethz/shorah" + "/releases/download/v" version + "/shorah-" version ".tar.xz")) + (sha256 + (base32 + "158dir9qcqspknlnyfr9zwk41x48nrh5wcg10k2grh9cidp9daiq")))) + (build-system gnu-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'fix-test-wrapper + (lambda* (#:key outputs #:allow-other-keys) + (let ((bin (string-append (assoc-ref outputs "out") "/bin"))) + (substitute* "examples/run_end2end_test" + (("\\$\\{interpreter\\} ../\\$\\{testscript\\}") + (string-append bin "/${testscript}")))))) + (delete 'check) + (add-after 'install 'wrap-programs + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (site (string-append + out "/lib/python" + ,(version-major+minor + (package-version python)) + "/site-packages")) + (pythonpath (getenv "PYTHONPATH")) + (script (string-append out "/bin/shorah"))) + (chmod script #o555) + (wrap-program script `("PYTHONPATH" ":" prefix (,site ,pythonpath)))))) + (add-after 'wrap-programs 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (invoke "make" "check"))))))) + (inputs + `(("boost" ,boost) + ("htslib" ,htslib) + ("python" ,python) + ("python-biopython" ,python-biopython) + ("python-numpy" ,python-numpy) + ("zlib" ,zlib))) + (native-inputs + `(("pkg-config" ,pkg-config))) + (home-page "") + (synopsis "Short reads assembly into haplotypes") + (description + "ShoRAH is a project for the analysis of next generation sequencing data. +It is designed to analyse genetically heterogeneous samples. Its tools +provide error correction, haplotype reconstruction and estimation of the +frequency of the different genetic variants present in a mixed sample.") + (license license:gpl3+))) + (define-public ruby-bio-kseq (package (name "ruby-bio-kseq") -- cgit v1.2.3 From 76fb1a5cf29c3ecf189776eb16db4fcb89bb1939 Mon Sep 17 00:00:00 2001 From: Vinicius Monego Date: Sun, 25 Apr 2021 03:26:05 +0000 Subject: gnu: Add python-louvain 0.15. * gnu/packages/graph.scm (python-louvain): Rename variable to python-louvain-0.6. (python-louvain): New variable. * gnu/packages/bioinformatics.scm (python-scanpy)[propagated-inputs]: Use python-louvain-0.6. Co-authored-by: Leo Famulari --- gnu/packages/bioinformatics.scm | 2 +- gnu/packages/graph.scm | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 0aaf30872e..b6e7249980 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -12586,7 +12586,7 @@ in RNA-seq data.") ("python-igraph" ,python-igraph) ("python-joblib" ,python-joblib) ("python-legacy-api-wrap" ,python-legacy-api-wrap) - ("python-louvain" ,python-louvain) + ("python-louvain" ,python-louvain-0.6) ("python-matplotlib" ,python-matplotlib) ("python-natsort" ,python-natsort) ("python-networkx" ,python-networkx) diff --git a/gnu/packages/graph.scm b/gnu/packages/graph.scm index d2e4c875a1..95506c69a9 100644 --- a/gnu/packages/graph.scm +++ b/gnu/packages/graph.scm @@ -6,6 +6,7 @@ ;;; Copyright © 2019 Andreas Enge ;;; Copyright © 2020 Alexander Krotov ;;; Copyright © 2020 Pierre Langlois +;;; Copyright © 2021 Vinicius Monego ;;; ;;; This file is part of GNU Guix. ;;; @@ -229,6 +230,27 @@ subplots, multiple-axes, polar charts, and bubble charts. ") (package-with-python2 python-plotly-2.4.1)) (define-public python-louvain + (package + (name "python-louvain") + (version "0.15") + (source + (origin + (method url-fetch) + (uri (pypi-uri "python-louvain" version)) + (sha256 + (base32 "1sqp97fwh4asx0jr72x8hil8z8fcg2xq92jklmh2m599pvgnx19a")))) + (build-system python-build-system) + (propagated-inputs + `(("python-networkx" ,python-networkx) + ("python-numpy" ,python-numpy))) + (home-page "https://github.com/taynaud/python-louvain") + (synopsis "Louvain algorithm for community detection") + (description + "This package provides a pure Python implementation of the Louvain +algorithm for community detection in large networks.") + (license license:bsd-3))) + +(define-public python-louvain-0.6 (package (name "python-louvain") (version "0.6.1") -- cgit v1.2.3 From 5ea5ea60669a398c5c7eaf4cc23f1ec315eb3a3d Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Wed, 28 Apr 2021 10:49:03 +0200 Subject: gnu: Add python-pyliftover. * gnu/packages/bioinformatics.scm (python-pyliftover): New variable. --- gnu/packages/bioinformatics.scm | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index b6e7249980..97f3f916c8 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -15238,6 +15238,31 @@ database. That database maps k-mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer.") (license license:expat))) +(define-public python-pyliftover + (package + (name "python-pyliftover") + (version "0.4") + ;; The version of pypi does not include test data. + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/konstantint/pyliftover") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1j8jp9iynv2l3jv5pr0pn0p3azlama1bqg233piglzm6bqh3m2m3")))) + (build-system python-build-system) + (arguments `(#:tests? #false)) ; the tests access the web + (native-inputs + `(("python-pytest" ,python-pytest))) + (home-page "https://github.com/konstantint/pyliftover") + (synopsis "Python implementation of UCSC liftOver genome coordinate conversion") + (description + "PyLiftover is a library for quick and easy conversion of genomic (point) +coordinates between different assemblies.") + (license license:expat))) + (define-public r-signac (let ((commit "e0512d348adeda4a3f23a2e8f56d1fe09840e03c") (revision "1")) -- cgit v1.2.3 From f01b3cd0632d3ef7e609625e0dd32028a5c930d7 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Wed, 28 Apr 2021 14:48:39 +0200 Subject: gnu: kraken2: Replace references to rsync. * gnu/packages/bioinformatics.scm (kraken2)[inputs]: Add rsync. [arguments]: Replace references to rsync; use WITH-DIRECTORY-EXCURSION to avoid repeating "scripts". --- gnu/packages/bioinformatics.scm | 85 +++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 38 deletions(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 97f3f916c8..92c0692f45 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -130,6 +130,7 @@ #:use-module (gnu packages qt) #:use-module (gnu packages rdf) #:use-module (gnu packages readline) + #:use-module (gnu packages rsync) #:use-module (gnu packages ruby) #:use-module (gnu packages serialization) #:use-module (gnu packages shells) @@ -15182,50 +15183,58 @@ international community.") (add-before 'install 'install-scripts (lambda* (#:key outputs #:allow-other-keys) (let* ((bin (string-append (assoc-ref outputs "out") "/bin")) - (scripts (find-files "scripts" ".*")) (replacements `(("KRAKEN2_DIR" . ,bin) ("VERSION" . ,,version)))) (mkdir-p bin) - (substitute* scripts - (("#####=([^=]+)=#####" _ key) - (or (assoc-ref replacements key) - (error (format #false "unknown key: ~a~%" key))))) - (substitute* "scripts/kraken2" - (("compression_program = \"bzip2\"") - (string-append "compression_program = \"" - (which "bzip2") - "\"")) - (("compression_program = \"gzip\"") - (string-append "compression_program = \"" - (which "gzip") - "\""))) - (substitute* '("scripts/download_genomic_library.sh" - "scripts/download_taxonomy.sh" - "scripts/16S_gg_installation.sh" - "scripts/16S_silva_installation.sh" - "scripts/16S_rdp_installation.sh") - (("wget") (which "wget"))) - (substitute* "scripts/mask_low_complexity.sh" - (("which") (which "which"))) - (substitute* '("scripts/mask_low_complexity.sh" - "scripts/download_genomic_library.sh" - "scripts/16S_silva_installation.sh") - (("sed -e ") - (string-append (which "sed") " -e "))) - (substitute* '("scripts/rsync_from_ncbi.pl" - "scripts/16S_rdp_installation.sh" - "scripts/16S_silva_installation.sh" - "scripts/16S_gg_installation.sh" - "scripts/download_taxonomy.sh" - "scripts/download_genomic_library.sh") - (("gunzip") (which "gunzip"))) - (for-each (lambda (script) - (chmod script #o555) - (install-file script bin)) - scripts))))))) + + (with-directory-excursion "scripts" + (let ((scripts (find-files "." ".*"))) + (substitute* scripts + (("#####=([^=]+)=#####" _ key) + (or (assoc-ref replacements key) + (error (format #false "unknown key: ~a~%" key))))) + (substitute* "kraken2" + (("compression_program = \"bzip2\"") + (string-append "compression_program = \"" + (which "bzip2") + "\"")) + (("compression_program = \"gzip\"") + (string-append "compression_program = \"" + (which "gzip") + "\""))) + (substitute* '("download_genomic_library.sh" + "download_taxonomy.sh" + "16S_gg_installation.sh" + "16S_silva_installation.sh" + "16S_rdp_installation.sh") + (("wget") (which "wget"))) + (substitute* '("download_taxonomy.sh" + "download_genomic_library.sh" + "rsync_from_ncbi.pl") + (("rsync -") + (string-append (which "rsync") " -"))) + (substitute* "mask_low_complexity.sh" + (("which") (which "which"))) + (substitute* '("mask_low_complexity.sh" + "download_genomic_library.sh" + "16S_silva_installation.sh") + (("sed -e ") + (string-append (which "sed") " -e "))) + (substitute* '("rsync_from_ncbi.pl" + "16S_rdp_installation.sh" + "16S_silva_installation.sh" + "16S_gg_installation.sh" + "download_taxonomy.sh" + "download_genomic_library.sh") + (("gunzip") (which "gunzip"))) + (for-each (lambda (script) + (chmod script #o555) + (install-file script bin)) + scripts))))))))) (inputs `(("gzip" ,gzip) ("perl" ,perl) + ("rsync" ,rsync) ("sed" ,sed) ("wget" ,wget) ("which" ,which))) -- cgit v1.2.3 From 01e33a031e493477d930b9383d397fea012a3b1a Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Wed, 28 Apr 2021 17:41:08 +0200 Subject: gnu: Add lofreq. * gnu/packages/bioinformatics.scm (lofreq): New variable. --- gnu/packages/bioinformatics.scm | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 92c0692f45..9b819d1a23 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -15247,6 +15247,40 @@ database. That database maps k-mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer.") (license license:expat))) +(define-public lofreq + (package + (name "lofreq") + (version "2.1.5") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/CSB5/lofreq") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0qssrn3mgjak7df6iqc1rljqd3g3a5syvg0lsv4vds43s3fq23bl")))) + (build-system gnu-build-system) + (arguments + '(#:test-target "bug-tests" + #:tests? #false)) ; test data are not included + (inputs + `(("htslib" ,htslib) + ("python" ,python-wrapper) + ("zlib" ,zlib))) + (native-inputs + `(("autoconf" ,autoconf) + ("automake" ,automake) + ("which" ,which))) + (home-page "https://csb5.github.io/lofreq/") + (synopsis "Sensitive variant calling from sequencing data ") + (description "LoFreq is a fast and sensitive variant-caller for inferring +SNVs and indels from next-generation sequencing data. It makes full use of +base-call qualities and other sources of errors inherent in +sequencing (e.g. mapping or base/indel alignment uncertainty), which are +usually ignored by other methods or only used for filtering.") + (license license:expat))) + (define-public python-pyliftover (package (name "python-pyliftover") -- cgit v1.2.3 From e7707d5262908b916b905851d662e446362032f2 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Wed, 28 Apr 2021 21:57:02 +0200 Subject: gnu: pigx-bsseq: Update to 0.1.3. * gnu/packages/bioinformatics.scm (pigx-bsseq): Update to 0.1.3. [inputs]: Add r-ggbio, r-matrixstats, and r-reshape2. --- gnu/packages/bioinformatics.scm | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 9b819d1a23..1847341b26 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -11950,7 +11950,7 @@ in an easily configurable manner.") (define-public pigx-bsseq (package (name "pigx-bsseq") - (version "0.1.2") + (version "0.1.3") (source (origin (method url-fetch) (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/" @@ -11958,7 +11958,7 @@ in an easily configurable manner.") "/pigx_bsseq-" version ".tar.gz")) (sha256 (base32 - "0mpzlay2d5cjpmrcp7knff6rg1c2mqszd638n7lw0mc0cycbp9f8")))) + "0blm0bl5z3ng01n7hh2ffk4rkzvf7vb3nm0crgdzrxr5cahxdxql")))) (build-system gnu-build-system) (arguments `(;; TODO: tests currently require 12+GB of RAM. See @@ -11984,8 +11984,11 @@ in an easily configurable manner.") ("r-annotationhub" ,r-annotationhub) ("r-dt" ,r-dt) ("r-genomation" ,r-genomation) + ("r-ggbio" ,r-ggbio) ("r-ggrepel" ,r-ggrepel) + ("r-matrixstats" ,r-matrixstats) ("r-methylkit" ,r-methylkit) + ("r-reshape2" ,r-reshape2) ("r-rtracklayer" ,r-rtracklayer) ("r-rmarkdown" ,r-rmarkdown) ("r-bookdown" ,r-bookdown) -- cgit v1.2.3 From 241994230ba0f3ba666b20e86bdd7130e234e797 Mon Sep 17 00:00:00 2001 From: Roel Janssen Date: Fri, 16 Apr 2021 11:49:49 +0200 Subject: gnu: htslib: Update to 1.12. * gnu/packages/bioinformatics.scm (htslib): Update to 1.12. --- gnu/packages/bioinformatics.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 1847341b26..1183f78c53 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -4478,7 +4478,7 @@ performance.") (define-public htslib (package (name "htslib") - (version "1.11") + (version "1.12") (source (origin (method url-fetch) (uri (string-append @@ -4486,7 +4486,7 @@ performance.") version "/htslib-" version ".tar.bz2")) (sha256 (base32 - "1mrq4mihzx37yqhj3sfz6da6mw49niia808bzsw2gkkgmadxvyng")))) + "1jplnvizgr0fyyvvmkfmnsywrrpqhid3760vw15bllz98qdi9012")))) (build-system gnu-build-system) ;; Let htslib translate "gs://" and "s3://" to regular https links with ;; "--enable-gcs" and "--enable-s3". For these options to work, we also -- cgit v1.2.3 From b904bb89a0961c17d66c83a002cb3b0904432d8e Mon Sep 17 00:00:00 2001 From: Roel Janssen Date: Fri, 16 Apr 2021 11:50:53 +0200 Subject: gnu: Update bcftools to 1.12. * gnu/packages/bioinformatics.scm (bcftools): Update to 1.12. --- gnu/packages/bioinformatics.scm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 1183f78c53..e8e6e9dfba 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -329,7 +329,7 @@ BAM files.") (define-public bcftools (package (name "bcftools") - (version "1.11") + (version "1.12") (source (origin (method url-fetch) (uri (string-append "https://github.com/samtools/bcftools/" @@ -337,11 +337,11 @@ BAM files.") version "/bcftools-" version ".tar.bz2")) (sha256 (base32 - "0r508mp15pqzf8r1269kb4v5naw9zsvbwd3cz8s1yj7carsf9viw")) + "1x94l1hy2pi3lbz0sxlbw0g6q5z5apcrhrlcwda94ns9n4r6a3ks")) (modules '((guix build utils))) (snippet '(begin ;; Delete bundled htslib. - (delete-file-recursively "htslib-1.11") + (delete-file-recursively "htslib-1.12") #t)))) (build-system gnu-build-system) (arguments -- cgit v1.2.3 From 558d11c3a85e932312de016d978d558bf2876f5b Mon Sep 17 00:00:00 2001 From: Roel Janssen Date: Fri, 16 Apr 2021 11:51:41 +0200 Subject: gnu: Update samtools to 1.12. * gnu/packages/bioinformatics.scm (samtools): Update to 1.12. --- gnu/packages/bioinformatics.scm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index e8e6e9dfba..bc67b27d6c 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -5903,7 +5903,7 @@ to the user's query of interest.") (define-public samtools (package (name "samtools") - (version "1.11") + (version "1.12") (source (origin (method url-fetch) @@ -5912,11 +5912,11 @@ to the user's query of interest.") version "/samtools-" version ".tar.bz2")) (sha256 (base32 - "1dp5wknak4arnw5ghhif9mmljlfnw5bgm91wib7z0j8wdjywx0z2")) + "1jrdj2idpma5ja9cg0rr73b565vdbr9wyy6zig54bidicc2pg8vd")) (modules '((guix build utils))) (snippet '(begin ;; Delete bundled htslib. - (delete-file-recursively "htslib-1.11") + (delete-file-recursively "htslib-1.12") #t)))) (build-system gnu-build-system) (arguments -- cgit v1.2.3 From 7540f6915c0ea04412ffdc8d70acd7b063251e74 Mon Sep 17 00:00:00 2001 From: Roel Janssen Date: Fri, 16 Apr 2021 11:52:16 +0200 Subject: gnu: Update bedtools to 2.30.0. * gnu/packages/bioinformatics.scm (bedtools): Update to 2.30.0, [inputs]: Use latest samtools. --- gnu/packages/bioinformatics.scm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index bc67b27d6c..83ebfc2d8f 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -455,7 +455,7 @@ computational cluster.") (define-public bedtools (package (name "bedtools") - (version "2.29.2") + (version "2.30.0") (source (origin (method url-fetch) (uri (string-append "https://github.com/arq5x/bedtools2/releases/" @@ -463,7 +463,7 @@ computational cluster.") "bedtools-" version ".tar.gz")) (sha256 (base32 - "0m3hk6548846w83a9s5drsczvy67n2azx41kj71n03klb2gbzwg3")))) + "1f2hh79l7dn147c2xyfgf5wfjvlqfw32kjfnnh2n1qy6rpzx2fik")))) (build-system gnu-build-system) (arguments '(#:test-target "test" @@ -475,7 +475,7 @@ computational cluster.") (native-inputs `(("python" ,python-wrapper))) (inputs - `(("samtools" ,samtools-1.9) + `(("samtools" ,samtools) ("zlib" ,zlib))) (home-page "https://github.com/arq5x/bedtools2") (synopsis "Tools for genome analysis and arithmetic") -- cgit v1.2.3 From f09e4dd8ea6b86ecbae9c90e0316f6fa44b546d6 Mon Sep 17 00:00:00 2001 From: Roel Janssen Date: Fri, 30 Apr 2021 13:47:43 +0200 Subject: gnu: Add pbgzip. * gnu/packages/bioinformatics.scm (pbgzip): New variable. --- gnu/packages/bioinformatics.scm | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 83ebfc2d8f..cd2dae05d5 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -3,7 +3,7 @@ ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft ;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins ;;; Copyright © 2015 Andreas Enge -;;; Copyright © 2016, 2020 Roel Janssen +;;; Copyright © 2016, 2020, 2021 Roel Janssen ;;; Copyright © 2016, 2017, 2018, 2019, 2020, 2021 Efraim Flashner ;;; Copyright © 2016, 2020 Marius Bakke ;;; Copyright © 2016, 2018 Raoul Bonnal @@ -571,6 +571,40 @@ input and output BAMs must adhere to the PacBio BAM format specification. Non-PacBio BAMs will cause exceptions to be thrown.") (license license:bsd-3))) +(define-public pbgzip + (let ((commit "2b09f97b5f20b6d83c63a5c6b408d152e3982974")) + (package + (name "pbgzip") + (version (git-version "0.0.0" "0" commit)) + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/nh13/pbgzip") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1mlmq0v96irbz71bgw5zcc43g1x32zwnxx21a5p1f1ch4cikw1yd")))) + (build-system gnu-build-system) + (native-inputs + `(("autoconf" ,autoconf) + ("automake" ,automake))) + (inputs + `(("zlib" ,zlib))) + (home-page "https://github.com/nh13/pbgzip") + (synopsis "Parallel Block GZIP") + (description "This package implements parallel block gzip. For many +formats, in particular genomics data formats, data are compressed in +fixed-length blocks such that they can be easily indexed based on a (genomic) +coordinate order, since typically each block is sorted according to this order. +This allows for each block to be individually compressed (deflated), or more +importantly, decompressed (inflated), with the latter enabling random retrieval +of data in large files (gigabytes to terabytes). @code{pbgzip} is not limited +to any particular format, but certain features are tailored to genomics data +formats when enabled. Parallel decompression is somewhat faster, but the true +speedup comes during compression.") + (license license:expat)))) + (define-public blasr-libcpp (package (name "blasr-libcpp") -- cgit v1.2.3 From 3b002a49d45bc83fc424f8c847ba83390b678a3e Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Tue, 4 May 2021 09:02:07 +0200 Subject: gnu: r-annotationhub: Update to 2.22.1. * gnu/packages/bioinformatics.scm (r-annotationhub): Update to 2.22.1. --- gnu/packages/bioinformatics.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index cd2dae05d5..efb74e6051 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -9666,14 +9666,14 @@ Shiny-based display methods for Bioconductor objects.") (define-public r-annotationhub (package (name "r-annotationhub") - (version "2.22.0") + (version "2.22.1") (source (origin (method url-fetch) (uri (bioconductor-uri "AnnotationHub" version)) (sha256 (base32 - "1950x654ffqx53b154kbph808zdh2xm5vmj9vzmc5nxc28fi2z5g")))) + "08d7m0n4jkpajsj0bvi5xd4vi1zqczl6lnrh8kqi2fbjkrvwdqp5")))) (properties `((upstream-name . "AnnotationHub"))) (build-system r-build-system) (propagated-inputs -- cgit v1.2.3 From 7072dc8c5368fa1dfdde3c996659fd8bb3e8cb6f Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Tue, 4 May 2021 09:02:18 +0200 Subject: gnu: r-gage: Update to 2.40.2. * gnu/packages/bioinformatics.scm (r-gage): Update to 2.40.2. --- gnu/packages/bioinformatics.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index efb74e6051..1ac004327e 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -9763,14 +9763,14 @@ microarrays or GRanges for sequencing data.") (define-public r-gage (package (name "r-gage") - (version "2.40.1") + (version "2.40.2") (source (origin (method url-fetch) (uri (bioconductor-uri "gage" version)) (sha256 (base32 - "1iawa03dy4bl333my69d4sk7d74cjzfg5dpcxga6q5dglan4sp8r")))) + "1bs0hx8sqiyl08dqn2zx31kbv5aci4xvrs71pplx2yxal3jf5178")))) (build-system r-build-system) (propagated-inputs `(("r-annotationdbi" ,r-annotationdbi) -- cgit v1.2.3 From d4051161e7f56abeb5baa2ede91630e438b53ae8 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Tue, 4 May 2021 09:02:25 +0200 Subject: gnu: r-ensembldb: Update to 2.14.1. * gnu/packages/bioinformatics.scm (r-ensembldb): Update to 2.14.1. --- gnu/packages/bioinformatics.scm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 1ac004327e..eda7416fb2 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -9896,14 +9896,14 @@ originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2): (define-public r-ensembldb (package (name "r-ensembldb") - (version "2.14.0") + (version "2.14.1") (source (origin (method url-fetch) (uri (bioconductor-uri "ensembldb" version)) (sha256 (base32 - "04il99gcrqzakvc0bxchdp9gghkn1sp9lpiian0iz4y7r67z3wpy")))) + "1hxwfh19qafpdhzprvw4nr8ks3gz7f0y8gyfhk8yqmmvvnvgqv40")))) (build-system r-build-system) (propagated-inputs `(("r-annotationdbi" ,r-annotationdbi) -- cgit v1.2.3 From 7694acebd18d9b32fd01b70819c1e79de05b4f52 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Tue, 4 May 2021 10:11:34 +0200 Subject: gnu: r-summarizedexperiment: Move to (gnu packages bioconductor). * gnu/packages/bioinformatics.scm (r-summarizedexperiment): Move variable from here... * gnu/packages/bioconductor.scm (r-summarizedexperiment): ...to here. --- gnu/packages/bioconductor.scm | 34 ++++++++++++++++++++++++++++++++++ gnu/packages/bioinformatics.scm | 34 ---------------------------------- 2 files changed, 34 insertions(+), 34 deletions(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioconductor.scm b/gnu/packages/bioconductor.scm index 79aaf92124..9b49c6ec43 100644 --- a/gnu/packages/bioconductor.scm +++ b/gnu/packages/bioconductor.scm @@ -2395,6 +2395,40 @@ purposes. The package also contains legacy support for early single-end, ungapped alignment formats.") (license license:artistic2.0))) +(define-public r-summarizedexperiment + (package + (name "r-summarizedexperiment") + (version "1.20.0") + (source (origin + (method url-fetch) + (uri (bioconductor-uri "SummarizedExperiment" version)) + (sha256 + (base32 + "04x6d4mcsnvz6glkmf6k2cv3fs8zk03i9rvv0ahpl793n8l411ps")))) + (properties + `((upstream-name . "SummarizedExperiment"))) + (build-system r-build-system) + (propagated-inputs + `(("r-biobase" ,r-biobase) + ("r-biocgenerics" ,r-biocgenerics) + ("r-delayedarray" ,r-delayedarray) + ("r-genomeinfodb" ,r-genomeinfodb) + ("r-genomicranges" ,r-genomicranges) + ("r-iranges" ,r-iranges) + ("r-matrix" ,r-matrix) + ("r-matrixgenerics" ,r-matrixgenerics) + ("r-s4vectors" ,r-s4vectors))) + (native-inputs + `(("r-knitr" ,r-knitr))) + (home-page "https://bioconductor.org/packages/SummarizedExperiment") + (synopsis "Container for representing genomic ranges by sample") + (description + "The SummarizedExperiment container contains one or more assays, each +represented by a matrix-like object of numeric or other mode. The rows +typically represent genomic ranges of interest and the columns represent +samples.") + (license license:artistic2.0))) + (define-public r-systempiper (package (name "r-systempiper") diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index eda7416fb2..de76e67f57 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -7838,40 +7838,6 @@ including VCF header and contents in RDF and JSON.") (home-page "https://github.com/vcflib/bio-vcf") (license license:expat))) -(define-public r-summarizedexperiment - (package - (name "r-summarizedexperiment") - (version "1.20.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "SummarizedExperiment" version)) - (sha256 - (base32 - "04x6d4mcsnvz6glkmf6k2cv3fs8zk03i9rvv0ahpl793n8l411ps")))) - (properties - `((upstream-name . "SummarizedExperiment"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-delayedarray" ,r-delayedarray) - ("r-genomeinfodb" ,r-genomeinfodb) - ("r-genomicranges" ,r-genomicranges) - ("r-iranges" ,r-iranges) - ("r-matrix" ,r-matrix) - ("r-matrixgenerics" ,r-matrixgenerics) - ("r-s4vectors" ,r-s4vectors))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/SummarizedExperiment") - (synopsis "Container for representing genomic ranges by sample") - (description - "The SummarizedExperiment container contains one or more assays, each -represented by a matrix-like object of numeric or other mode. The rows -typically represent genomic ranges of interest and the columns represent -samples.") - (license license:artistic2.0))) - (define-public r-genomicalignments (package (name "r-genomicalignments") -- cgit v1.2.3 From da7f89c73780185ae497aabc77a76cb0d81adc33 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Wed, 5 May 2021 16:18:13 +0200 Subject: gnu: python-pybedtools: Update to 0.8.2. * gnu/packages/bioinformatics.scm (python-pybedtools): Update to 0.8.2. [arguments]: Remove outdated substitutions; simplify check phase. --- gnu/packages/bioinformatics.scm | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index de76e67f57..df317df411 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -849,13 +849,13 @@ intended to behave exactly the same as the original BWK awk.") (define-public python-pybedtools (package (name "python-pybedtools") - (version "0.8.1") + (version "0.8.2") (source (origin (method url-fetch) (uri (pypi-uri "pybedtools" version)) (sha256 (base32 - "14w5i40gi25clrr7h4wa2pcpnyipya8hrqi7nq77553zc5wf0df0")))) + "0wc7z8g8prgdx7n5chjva2fdq03wiwhqisjjxzkjg1j5k5ha7151")))) (build-system python-build-system) (arguments `(#:modules ((ice-9 ftw) @@ -868,13 +868,6 @@ intended to behave exactly the same as the original BWK awk.") (modify-phases %standard-phases (add-after 'unpack 'disable-broken-tests (lambda _ - (substitute* "pybedtools/test/test_scripts.py" - ;; This test freezes. - (("def test_intron_exon_reads") - "def _do_not_test_intron_exon_reads") - ;; This test fails in the Python 2 build. - (("def test_venn_mpl") - "def _do_not_test_venn_mpl")) (substitute* "pybedtools/test/test_helpers.py" ;; Requires internet access. (("def test_chromsizes") @@ -886,8 +879,7 @@ intended to behave exactly the same as the original BWK awk.") ;; This issue still occurs on python2 (substitute* "pybedtools/test/test_issues.py" (("def test_issue_303") - "def _test_issue_303")) - #t)) + "def _test_issue_303")))) ;; TODO: Remove phase after it's part of PYTHON-BUILD-SYSTEM. ;; build system. ;; Force the Cythonization of C++ files to guard against compilation @@ -908,23 +900,13 @@ intended to behave exactly the same as the original BWK awk.") (invoke "python" "setup.py" "cythonize"))) (replace 'check (lambda _ - (let* ((cwd (getcwd)) - (build-root-directory (string-append cwd "/build/")) + (let* ((build-root-directory (string-append (getcwd) "/build/")) (build (string-append build-root-directory (find (cut string-prefix? "lib" <>) - (scandir (string-append - build-root-directory))))) - (scripts (string-append - build-root-directory - (find (cut string-prefix? "scripts" <>) - (scandir build-root-directory))))) + (scandir build-root-directory))))) (setenv "PYTHONPATH" - (string-append build ":" (getenv "PYTHONPATH"))) - ;; Executable scripts such as 'intron_exon_reads.py' must be - ;; available in the PATH. - (setenv "PATH" - (string-append scripts ":" (getenv "PATH")))) + (string-append build ":" (getenv "PYTHONPATH")))) ;; The tests need to be run from elsewhere... (mkdir-p "/tmp/test") (copy-recursively "pybedtools/test" "/tmp/test") -- cgit v1.2.3 From ce7b202f9b157c173cb9c2ab8d7a4b2d4a17496a Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Thu, 6 May 2021 18:55:45 +0200 Subject: gnu: Add perl-bio-db-hts. * gnu/packages/bioinformatics.scm (perl-bio-db-hts): New variable. --- gnu/packages/bioinformatics.scm | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index df317df411..60614613bd 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -1154,6 +1154,31 @@ converted to Alignment objects, and so on. This means that the objects provide a coordinated and extensible framework to do computational biology.") (license license:perl-license)))) +(define-public perl-bio-db-hts + (package + (name "perl-bio-db-hts") + (version "3.01") + (source + (origin + (method url-fetch) + (uri (string-append "mirror://cpan/authors/id/A/AV/AVULLO/Bio-DB-HTS-" + version ".tar.gz")) + (sha256 + (base32 + "0hjg0igfkpvh27zdkdr6pa7cqm9n6r7cwz0np74cl4wmawgvr9hj")))) + (build-system perl-build-system) + (native-inputs + `(("perl-module-build" ,perl-module-build) + ("pkg-config" ,pkg-config))) + (propagated-inputs + `(("bioperl-minimal" ,bioperl-minimal) + ("htslib" ,htslib-1.9))) + (home-page "https://metacpan.org/release/Bio-DB-HTS") + (synopsis "Perl interface to HTS library for DNA sequencing") + (description "This is a Perl interface to the HTS library for DNA +sequencing.") + (license license:asl2.0))) + (define-public python-biopython (package (name "python-biopython") -- cgit v1.2.3 From 9b99b6d9476c9191eb25bbb1eb1cec45ca79485d Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Fri, 7 May 2021 08:52:31 +0200 Subject: gnu: Add ensembl-vep. * gnu/packages/bioinformatics.scm (ensembl-vep): New variable. --- gnu/packages/bioinformatics.scm | 145 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) (limited to 'gnu/packages/bioinformatics.scm') diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 60614613bd..9c0e18591f 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -15316,6 +15316,151 @@ usually ignored by other methods or only used for filtering.") coordinates between different assemblies.") (license license:expat))) +(define-public ensembl-vep + (let* ((api-version "103") + (api-module + (lambda (name hash) + (origin (method git-fetch) + (uri (git-reference + (url (string-append "https://github.com/Ensembl/" + name ".git")) + (commit (string-append "release/" api-version)))) + (file-name (string-append name "-" api-version "-checkout")) + (sha256 (base32 hash)))))) + (package + (name "ensembl-vep") + (version (string-append api-version ".1")) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/Ensembl/ensembl-vep.git") + (commit (string-append "release/" version)))) + (sha256 + (base32 + "1iq7p72cv9b38jz2v8a4slzy2n8y0md487943180ym9xc8qvw09c")))) + (build-system gnu-build-system) + (arguments + `(#:modules ((guix build gnu-build-system) + (guix build utils) + (ice-9 match)) + #:phases + (modify-phases %standard-phases + (delete 'configure) + (delete 'build) + ;; Tests need to run after installation + (delete 'check) + (replace 'install + (lambda* (#:key inputs outputs #:allow-other-keys) + (let* ((modules '(("ensembl" "/") + ("ensembl-variation" "/Variation") + ("ensembl-funcgen" "/Funcgen") + ("ensembl-io" "/"))) + (scripts '(("convert_cache.pl" "vep_convert_cache.pl") + ("INSTALL.pl" "vep_install.pl") + ("haplo" #f) + ("variant_recoder" #f) + ("filter_vep" #f) + ("vep" #f))) + (out (assoc-ref outputs "out")) + (bin (string-append out "/bin")) + (perl (string-append out "/lib/perl5/site_perl"))) + (for-each + (match-lambda + ((name path) + (let ((dir (string-append perl "/Bio/EnsEMBL" path))) + (mkdir-p dir) + (copy-recursively + (string-append (assoc-ref inputs (string-append "api-module-" name)) + "/modules/Bio/EnsEMBL" path) + dir)))) + modules) + (copy-recursively "modules/" perl) + (mkdir-p bin) + (for-each + (match-lambda + ((script new-name) + (let ((location (string-append bin "/" + (or new-name (basename script))))) + (copy-file script location) + (chmod location #o555) + (wrap-program location + `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB") + ,perl)))))) + scripts) + + ;; Fix path to tools + (with-directory-excursion (string-append perl "/Bio/EnsEMBL") + (substitute* '("Funcgen/RunnableDB/ProbeMapping/PrePipelineChecks.pm" + "VEP/BaseRunner.pm" + "VEP/Utils.pm" + "VEP/AnnotationSource/Cache/VariationTabix.pm" + "VEP/AnnotationSource/Cache/BaseSerialized.pm" + "Variation/Utils/BaseVepTabixPlugin.pm" + "Variation/Utils/VEP.pm" + "Variation/Pipeline/ReleaseDataDumps/PreRunChecks.pm") + (("`which") + (string-append "`" + (assoc-ref inputs "which") + "/bin/which"))))))) + (add-after 'install 'check + (lambda* (#:key tests? inputs outputs #:allow-other-keys) + (when tests? + (setenv "PERL5LIB" + (string-append (getenv "PERL5LIB") + ":" + (assoc-ref outputs "out") + "/lib/perl5/site_perl")) + (copy-recursively (string-append (assoc-ref inputs "source") "/t") + "/tmp/t") + (for-each make-file-writable (find-files "/tmp/t")) + ;; TODO: haplo needs Set/IntervalTree.pm + (invoke "perl" "-e" (string-append " +use Test::Harness; use Test::Exception; +my $dirname = \"/tmp\"; +opendir TEST, \"$dirname\\/t\"; +my @test_files = map {\"$dirname\\/t\\/\".$_} grep {!/^\\./ && /\\.t$/} readdir TEST; closedir TEST; +@test_files = grep {!/Haplo/} @test_files; +runtests(@test_files); +")))))))) + (inputs + `(("bioperl-minimal" ,bioperl-minimal) + ("perl-bio-db-hts" ,perl-bio-db-hts) + ("perl-dbi" ,perl-dbi) + ("perl-dbd-mysql" ,perl-dbd-mysql) + ("perl-libwww" ,perl-libwww) + ("perl-http-tiny" ,perl-http-tiny) + ("perl-json" ,perl-json) + ("which" ,which))) + (propagated-inputs + `(("kentutils" ,kentutils))) + (native-inputs + `(("unzip" ,unzip) + ("perl" ,perl) + ("api-module-ensembl" + ,(api-module "ensembl" + "0s59rj905g72hljzfpvnx5nxwz925b917y4jp912i23f5gwxh14v")) + ("api-module-ensembl-variation" + ,(api-module "ensembl-variation" + "1dvwdzzfjhzymq02b6n4p6j3a9q4jgq0g89hs7hj1apd7zhirgkq")) + ("api-module-ensembl-funcgen" + ,(api-module "ensembl-funcgen" + "1x23pv38dmv0w0gby6rv3wds50qghb4v3v1mf43vk55msfxzry8n")) + ("api-module-ensembl-io" + ,(api-module "ensembl-io" + "14adb2x934lzsq20035mazdkhrkcw0qzb0xhz6zps9vk4wixwaix")) + ("perl-test-harness" ,perl-test-harness) + ("perl-test-exception" ,perl-test-exception))) + (home-page "http://www.ensembl.org/vep") + (synopsis "Predict functional effects of genomic variants") + (description + "This package provides a Variant Effect Predictor, which predicts +the functional effects of genomic variants. It also provides +Haplosaurus, which uses phased genotype data to predict +whole-transcript haplotype sequences, and Variant Recoder, which +translates between different variant encodings.") + (license license:asl2.0)))) + (define-public r-signac (let ((commit "e0512d348adeda4a3f23a2e8f56d1fe09840e03c") (revision "1")) -- cgit v1.2.3