summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm543
1 files changed, 477 insertions, 66 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 313c70784a..0d09a98dc5 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -3,7 +3,7 @@
;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins <pjotr.guix@thebird.nl>
;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
-;;; Copyright © 2016, 2020 Roel Janssen <roel@gnu.org>
+;;; Copyright © 2016, 2020, 2021 Roel Janssen <roel@gnu.org>
;;; Copyright © 2016, 2017, 2018, 2019, 2020, 2021 Efraim Flashner <efraim@flashner.co.il>
;;; Copyright © 2016, 2020 Marius Bakke <mbakke@fastmail.com>
;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
@@ -130,6 +130,7 @@
#:use-module (gnu packages qt)
#:use-module (gnu packages rdf)
#:use-module (gnu packages readline)
+ #:use-module (gnu packages rsync)
#:use-module (gnu packages ruby)
#:use-module (gnu packages serialization)
#:use-module (gnu packages shells)
@@ -144,6 +145,7 @@
#:use-module (gnu packages tls)
#:use-module (gnu packages vim)
#:use-module (gnu packages web)
+ #:use-module (gnu packages wget)
#:use-module (gnu packages xml)
#:use-module (gnu packages xorg)
#:use-module (srfi srfi-1)
@@ -311,7 +313,7 @@ BAM files.")
(define-public bcftools
(package
(name "bcftools")
- (version "1.11")
+ (version "1.12")
(source (origin
(method url-fetch)
(uri (string-append "https://github.com/samtools/bcftools/"
@@ -319,11 +321,11 @@ BAM files.")
version "/bcftools-" version ".tar.bz2"))
(sha256
(base32
- "0r508mp15pqzf8r1269kb4v5naw9zsvbwd3cz8s1yj7carsf9viw"))
+ "1x94l1hy2pi3lbz0sxlbw0g6q5z5apcrhrlcwda94ns9n4r6a3ks"))
(modules '((guix build utils)))
(snippet '(begin
;; Delete bundled htslib.
- (delete-file-recursively "htslib-1.11")
+ (delete-file-recursively "htslib-1.12")
#t))))
(build-system gnu-build-system)
(arguments
@@ -437,7 +439,7 @@ computational cluster.")
(define-public bedtools
(package
(name "bedtools")
- (version "2.29.2")
+ (version "2.30.0")
(source (origin
(method url-fetch)
(uri (string-append "https://github.com/arq5x/bedtools2/releases/"
@@ -445,7 +447,7 @@ computational cluster.")
"bedtools-" version ".tar.gz"))
(sha256
(base32
- "0m3hk6548846w83a9s5drsczvy67n2azx41kj71n03klb2gbzwg3"))))
+ "1f2hh79l7dn147c2xyfgf5wfjvlqfw32kjfnnh2n1qy6rpzx2fik"))))
(build-system gnu-build-system)
(arguments
'(#:test-target "test"
@@ -457,7 +459,7 @@ computational cluster.")
(native-inputs
`(("python" ,python-wrapper)))
(inputs
- `(("samtools" ,samtools-1.9)
+ `(("samtools" ,samtools)
("zlib" ,zlib)))
(home-page "https://github.com/arq5x/bedtools2")
(synopsis "Tools for genome analysis and arithmetic")
@@ -553,6 +555,40 @@ input and output BAMs must adhere to the PacBio BAM format specification.
Non-PacBio BAMs will cause exceptions to be thrown.")
(license license:bsd-3)))
+(define-public pbgzip
+ (let ((commit "2b09f97b5f20b6d83c63a5c6b408d152e3982974"))
+ (package
+ (name "pbgzip")
+ (version (git-version "0.0.0" "0" commit))
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/nh13/pbgzip")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "1mlmq0v96irbz71bgw5zcc43g1x32zwnxx21a5p1f1ch4cikw1yd"))))
+ (build-system gnu-build-system)
+ (native-inputs
+ `(("autoconf" ,autoconf)
+ ("automake" ,automake)))
+ (inputs
+ `(("zlib" ,zlib)))
+ (home-page "https://github.com/nh13/pbgzip")
+ (synopsis "Parallel Block GZIP")
+ (description "This package implements parallel block gzip. For many
+formats, in particular genomics data formats, data are compressed in
+fixed-length blocks such that they can be easily indexed based on a (genomic)
+coordinate order, since typically each block is sorted according to this order.
+This allows for each block to be individually compressed (deflated), or more
+importantly, decompressed (inflated), with the latter enabling random retrieval
+of data in large files (gigabytes to terabytes). @code{pbgzip} is not limited
+to any particular format, but certain features are tailored to genomics data
+formats when enabled. Parallel decompression is somewhat faster, but the true
+speedup comes during compression.")
+ (license license:expat))))
+
(define-public blasr-libcpp
(package
(name "blasr-libcpp")
@@ -797,13 +833,13 @@ intended to behave exactly the same as the original BWK awk.")
(define-public python-pybedtools
(package
(name "python-pybedtools")
- (version "0.8.1")
+ (version "0.8.2")
(source (origin
(method url-fetch)
(uri (pypi-uri "pybedtools" version))
(sha256
(base32
- "14w5i40gi25clrr7h4wa2pcpnyipya8hrqi7nq77553zc5wf0df0"))))
+ "0wc7z8g8prgdx7n5chjva2fdq03wiwhqisjjxzkjg1j5k5ha7151"))))
(build-system python-build-system)
(arguments
`(#:modules ((srfi srfi-26)
@@ -814,13 +850,6 @@ intended to behave exactly the same as the original BWK awk.")
(modify-phases %standard-phases
(add-after 'unpack 'disable-broken-tests
(lambda _
- (substitute* "pybedtools/test/test_scripts.py"
- ;; This test freezes.
- (("def test_intron_exon_reads")
- "def _do_not_test_intron_exon_reads")
- ;; This test fails in the Python 2 build.
- (("def test_venn_mpl")
- "def _do_not_test_venn_mpl"))
(substitute* "pybedtools/test/test_helpers.py"
;; Requires internet access.
(("def test_chromsizes")
@@ -1097,6 +1126,31 @@ converted to Alignment objects, and so on. This means that the objects
provide a coordinated and extensible framework to do computational biology.")
(license license:perl-license))))
+(define-public perl-bio-db-hts
+ (package
+ (name "perl-bio-db-hts")
+ (version "3.01")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append "mirror://cpan/authors/id/A/AV/AVULLO/Bio-DB-HTS-"
+ version ".tar.gz"))
+ (sha256
+ (base32
+ "0hjg0igfkpvh27zdkdr6pa7cqm9n6r7cwz0np74cl4wmawgvr9hj"))))
+ (build-system perl-build-system)
+ (native-inputs
+ `(("perl-module-build" ,perl-module-build)
+ ("pkg-config" ,pkg-config)))
+ (propagated-inputs
+ `(("bioperl-minimal" ,bioperl-minimal)
+ ("htslib" ,htslib-1.9)))
+ (home-page "https://metacpan.org/release/Bio-DB-HTS")
+ (synopsis "Perl interface to HTS library for DNA sequencing")
+ (description "This is a Perl interface to the HTS library for DNA
+sequencing.")
+ (license license:asl2.0)))
+
(define-public python-biopython
(package
(name "python-biopython")
@@ -1324,7 +1378,7 @@ package provides command line tools using the Bio++ library.")
(define-public blast+
(package
(name "blast+")
- (version "2.10.1")
+ (version "2.11.0")
(source (origin
(method url-fetch)
(uri (string-append
@@ -1332,7 +1386,7 @@ package provides command line tools using the Bio++ library.")
version "/ncbi-blast-" version "+-src.tar.gz"))
(sha256
(base32
- "11kvrrl0mcwww6530r55hccpg3x3msmhr3051fwnjbq8rzg2j1qi"))
+ "0m0r9vkw631ky1za1wilsfk9k9spwqh22nkrb9a57rbwmrc1i3nq"))
(modules '((guix build utils)))
(snippet
'(begin
@@ -4421,7 +4475,7 @@ performance.")
(define-public htslib
(package
(name "htslib")
- (version "1.11")
+ (version "1.12")
(source (origin
(method url-fetch)
(uri (string-append
@@ -4429,7 +4483,7 @@ performance.")
version "/htslib-" version ".tar.bz2"))
(sha256
(base32
- "1mrq4mihzx37yqhj3sfz6da6mw49niia808bzsw2gkkgmadxvyng"))))
+ "1jplnvizgr0fyyvvmkfmnsywrrpqhid3760vw15bllz98qdi9012"))))
(build-system gnu-build-system)
;; Let htslib translate "gs://" and "s3://" to regular https links with
;; "--enable-gcs" and "--enable-s3". For these options to work, we also
@@ -5846,7 +5900,7 @@ to the user's query of interest.")
(define-public samtools
(package
(name "samtools")
- (version "1.11")
+ (version "1.12")
(source
(origin
(method url-fetch)
@@ -5855,11 +5909,11 @@ to the user's query of interest.")
version "/samtools-" version ".tar.bz2"))
(sha256
(base32
- "1dp5wknak4arnw5ghhif9mmljlfnw5bgm91wib7z0j8wdjywx0z2"))
+ "1jrdj2idpma5ja9cg0rr73b565vdbr9wyy6zig54bidicc2pg8vd"))
(modules '((guix build utils)))
(snippet '(begin
;; Delete bundled htslib.
- (delete-file-recursively "htslib-1.11")
+ (delete-file-recursively "htslib-1.12")
#t))))
(build-system gnu-build-system)
(arguments
@@ -7175,6 +7229,43 @@ clustering analysis, differential analysis, motif inference and exploration of
single cell ATAC-seq sequencing data.")
(license license:gpl3)))
+(define-public r-shinycell
+ (let ((commit
+ "aecbd56e66802f28e397f5ae1f19403aadd12163")
+ (revision "1"))
+ (package
+ (name "r-shinycell")
+ (version (git-version "2.0.0" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/SGDDNB/ShinyCell")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "13jn2ikmvljnzayk485g1mmq5abcp9m1b8n1djdb1agmn83zaki5"))))
+ (properties `((upstream-name . "ShinyCell")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-data-table" ,r-data-table)
+ ("r-ggplot2" ,r-ggplot2)
+ ("r-glue" ,r-glue)
+ ("r-gridextra" ,r-gridextra)
+ ("r-hdf5r" ,r-hdf5r)
+ ("r-matrix" ,r-matrix)
+ ("r-r-utils" ,r-r-utils)
+ ("r-rcolorbrewer" ,r-rcolorbrewer)
+ ("r-readr" ,r-readr)
+ ("r-reticulate" ,r-reticulate)))
+ (home-page "https://github.com/SGDDNB/ShinyCell")
+ (synopsis "Shiny interactive web apps for single-cell data")
+ (description
+ "This package provides Shiny apps for interactive exploration of
+single-cell data.")
+ (license license:gpl3))))
+
(define-public r-archr
(let ((commit "46b519ffb6f73edf132497ac31650d19ef055dc1")
(revision "1"))
@@ -7489,6 +7580,64 @@ Perl and can be helpful if you want to filter, reformat, or trim your sequence
data. It also generates basic statistics for your sequences.")
(license license:gpl3+)))
+(define-public shorah
+ (package
+ (name "shorah")
+ (version "1.99.2")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (string-append "https://github.com/cbg-ethz/shorah"
+ "/releases/download/v" version
+ "/shorah-" version ".tar.xz"))
+ (sha256
+ (base32
+ "158dir9qcqspknlnyfr9zwk41x48nrh5wcg10k2grh9cidp9daiq"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'fix-test-wrapper
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
+ (substitute* "examples/run_end2end_test"
+ (("\\$\\{interpreter\\} ../\\$\\{testscript\\}")
+ (string-append bin "/${testscript}"))))))
+ (delete 'check)
+ (add-after 'install 'wrap-programs
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((out (assoc-ref outputs "out"))
+ (site (string-append
+ out "/lib/python"
+ ,(version-major+minor
+ (package-version python))
+ "/site-packages"))
+ (pythonpath (getenv "PYTHONPATH"))
+ (script (string-append out "/bin/shorah")))
+ (chmod script #o555)
+ (wrap-program script `("PYTHONPATH" ":" prefix (,site ,pythonpath))))))
+ (add-after 'wrap-programs 'check
+ (lambda* (#:key tests? #:allow-other-keys)
+ (when tests?
+ (invoke "make" "check")))))))
+ (inputs
+ `(("boost" ,boost)
+ ("htslib" ,htslib)
+ ("python" ,python)
+ ("python-biopython" ,python-biopython)
+ ("python-numpy" ,python-numpy)
+ ("zlib" ,zlib)))
+ (native-inputs
+ `(("pkg-config" ,pkg-config)))
+ (home-page "")
+ (synopsis "Short reads assembly into haplotypes")
+ (description
+ "ShoRAH is a project for the analysis of next generation sequencing data.
+It is designed to analyse genetically heterogeneous samples. Its tools
+provide error correction, haplotype reconstruction and estimation of the
+frequency of the different genetic variants present in a mixed sample.")
+ (license license:gpl3+)))
+
(define-public ruby-bio-kseq
(package
(name "ruby-bio-kseq")
@@ -7633,40 +7782,6 @@ including VCF header and contents in RDF and JSON.")
(home-page "https://github.com/vcflib/bio-vcf")
(license license:expat)))
-(define-public r-summarizedexperiment
- (package
- (name "r-summarizedexperiment")
- (version "1.20.0")
- (source (origin
- (method url-fetch)
- (uri (bioconductor-uri "SummarizedExperiment" version))
- (sha256
- (base32
- "04x6d4mcsnvz6glkmf6k2cv3fs8zk03i9rvv0ahpl793n8l411ps"))))
- (properties
- `((upstream-name . "SummarizedExperiment")))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-biobase" ,r-biobase)
- ("r-biocgenerics" ,r-biocgenerics)
- ("r-delayedarray" ,r-delayedarray)
- ("r-genomeinfodb" ,r-genomeinfodb)
- ("r-genomicranges" ,r-genomicranges)
- ("r-iranges" ,r-iranges)
- ("r-matrix" ,r-matrix)
- ("r-matrixgenerics" ,r-matrixgenerics)
- ("r-s4vectors" ,r-s4vectors)))
- (native-inputs
- `(("r-knitr" ,r-knitr)))
- (home-page "https://bioconductor.org/packages/SummarizedExperiment")
- (synopsis "Container for representing genomic ranges by sample")
- (description
- "The SummarizedExperiment container contains one or more assays, each
-represented by a matrix-like object of numeric or other mode. The rows
-typically represent genomic ranges of interest and the columns represent
-samples.")
- (license license:artistic2.0)))
-
(define-public r-genomicalignments
(package
(name "r-genomicalignments")
@@ -9461,14 +9576,14 @@ Shiny-based display methods for Bioconductor objects.")
(define-public r-annotationhub
(package
(name "r-annotationhub")
- (version "2.22.0")
+ (version "2.22.1")
(source
(origin
(method url-fetch)
(uri (bioconductor-uri "AnnotationHub" version))
(sha256
(base32
- "1950x654ffqx53b154kbph808zdh2xm5vmj9vzmc5nxc28fi2z5g"))))
+ "08d7m0n4jkpajsj0bvi5xd4vi1zqczl6lnrh8kqi2fbjkrvwdqp5"))))
(properties `((upstream-name . "AnnotationHub")))
(build-system r-build-system)
(propagated-inputs
@@ -9558,14 +9673,14 @@ microarrays or GRanges for sequencing data.")
(define-public r-gage
(package
(name "r-gage")
- (version "2.40.1")
+ (version "2.40.2")
(source
(origin
(method url-fetch)
(uri (bioconductor-uri "gage" version))
(sha256
(base32
- "1iawa03dy4bl333my69d4sk7d74cjzfg5dpcxga6q5dglan4sp8r"))))
+ "1bs0hx8sqiyl08dqn2zx31kbv5aci4xvrs71pplx2yxal3jf5178"))))
(build-system r-build-system)
(propagated-inputs
`(("r-annotationdbi" ,r-annotationdbi)
@@ -9691,14 +9806,14 @@ originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
(define-public r-ensembldb
(package
(name "r-ensembldb")
- (version "2.14.0")
+ (version "2.14.1")
(source
(origin
(method url-fetch)
(uri (bioconductor-uri "ensembldb" version))
(sha256
(base32
- "04il99gcrqzakvc0bxchdp9gghkn1sp9lpiian0iz4y7r67z3wpy"))))
+ "1hxwfh19qafpdhzprvw4nr8ks3gz7f0y8gyfhk8yqmmvvnvgqv40"))))
(build-system r-build-system)
(propagated-inputs
`(("r-annotationdbi" ,r-annotationdbi)
@@ -11775,7 +11890,7 @@ in an easily configurable manner.")
(define-public pigx-bsseq
(package
(name "pigx-bsseq")
- (version "0.1.2")
+ (version "0.1.3")
(source (origin
(method url-fetch)
(uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
@@ -11783,7 +11898,7 @@ in an easily configurable manner.")
"/pigx_bsseq-" version ".tar.gz"))
(sha256
(base32
- "0mpzlay2d5cjpmrcp7knff6rg1c2mqszd638n7lw0mc0cycbp9f8"))))
+ "0blm0bl5z3ng01n7hh2ffk4rkzvf7vb3nm0crgdzrxr5cahxdxql"))))
(build-system gnu-build-system)
(arguments
`(;; TODO: tests currently require 12+GB of RAM. See
@@ -11809,8 +11924,11 @@ in an easily configurable manner.")
("r-annotationhub" ,r-annotationhub)
("r-dt" ,r-dt)
("r-genomation" ,r-genomation)
+ ("r-ggbio" ,r-ggbio)
("r-ggrepel" ,r-ggrepel)
+ ("r-matrixstats" ,r-matrixstats)
("r-methylkit" ,r-methylkit)
+ ("r-reshape2" ,r-reshape2)
("r-rtracklayer" ,r-rtracklayer)
("r-rmarkdown" ,r-rmarkdown)
("r-bookdown" ,r-bookdown)
@@ -12408,7 +12526,7 @@ in RNA-seq data.")
("python-igraph" ,python-igraph)
("python-joblib" ,python-joblib)
("python-legacy-api-wrap" ,python-legacy-api-wrap)
- ("python-louvain" ,python-louvain)
+ ("python-louvain" ,python-louvain-0.6)
("python-matplotlib" ,python-matplotlib)
("python-natsort" ,python-natsort)
("python-networkx" ,python-networkx)
@@ -14979,6 +15097,299 @@ signaling, and more. It continues to be evolved and expanded by an
international community.")
(license license:lgpl2.1+)))
+(define-public kraken2
+ (package
+ (name "kraken2")
+ (version "2.1.1")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/DerrickWood/kraken2")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0h7a7vygd7y5isbrnc6srwq6xj1rmyd33pm8mmcgfkmlxlg5vkg3"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:tests? #false ; there are none
+ #:make-flags (list "-C" "src"
+ (string-append "KRAKEN2_DIR="
+ (assoc-ref %outputs "out") "/bin"))
+ #:phases
+ (modify-phases %standard-phases
+ (delete 'configure)
+ (add-before 'install 'install-scripts
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((bin (string-append (assoc-ref outputs "out") "/bin"))
+ (replacements `(("KRAKEN2_DIR" . ,bin)
+ ("VERSION" . ,,version))))
+ (mkdir-p bin)
+
+ (with-directory-excursion "scripts"
+ (let ((scripts (find-files "." ".*")))
+ (substitute* scripts
+ (("#####=([^=]+)=#####" _ key)
+ (or (assoc-ref replacements key)
+ (error (format #false "unknown key: ~a~%" key)))))
+ (substitute* "kraken2"
+ (("compression_program = \"bzip2\"")
+ (string-append "compression_program = \""
+ (which "bzip2")
+ "\""))
+ (("compression_program = \"gzip\"")
+ (string-append "compression_program = \""
+ (which "gzip")
+ "\"")))
+ (substitute* '("download_genomic_library.sh"
+ "download_taxonomy.sh"
+ "16S_gg_installation.sh"
+ "16S_silva_installation.sh"
+ "16S_rdp_installation.sh")
+ (("wget") (which "wget")))
+ (substitute* '("download_taxonomy.sh"
+ "download_genomic_library.sh"
+ "rsync_from_ncbi.pl")
+ (("rsync -")
+ (string-append (which "rsync") " -")))
+ (substitute* "mask_low_complexity.sh"
+ (("which") (which "which")))
+ (substitute* '("mask_low_complexity.sh"
+ "download_genomic_library.sh"
+ "16S_silva_installation.sh")
+ (("sed -e ")
+ (string-append (which "sed") " -e ")))
+ (substitute* '("rsync_from_ncbi.pl"
+ "16S_rdp_installation.sh"
+ "16S_silva_installation.sh"
+ "16S_gg_installation.sh"
+ "download_taxonomy.sh"
+ "download_genomic_library.sh")
+ (("gunzip") (which "gunzip")))
+ (for-each (lambda (script)
+ (chmod script #o555)
+ (install-file script bin))
+ scripts)))))))))
+ (inputs
+ `(("gzip" ,gzip)
+ ("perl" ,perl)
+ ("rsync" ,rsync)
+ ("sed" ,sed)
+ ("wget" ,wget)
+ ("which" ,which)))
+ (home-page "https://github.com/DerrickWood/kraken2")
+ (synopsis "Taxonomic sequence classification system")
+ (description "Kraken is a taxonomic sequence classifier that assigns
+taxonomic labels to DNA sequences. Kraken examines the k-mers within a query
+sequence and uses the information within those k-mers to query a
+database. That database maps k-mers to the lowest common ancestor (LCA) of all
+genomes known to contain a given k-mer.")
+ (license license:expat)))
+
+(define-public lofreq
+ (package
+ (name "lofreq")
+ (version "2.1.5")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/CSB5/lofreq")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0qssrn3mgjak7df6iqc1rljqd3g3a5syvg0lsv4vds43s3fq23bl"))))
+ (build-system gnu-build-system)
+ (arguments
+ '(#:test-target "bug-tests"
+ #:tests? #false)) ; test data are not included
+ (inputs
+ `(("htslib" ,htslib)
+ ("python" ,python-wrapper)
+ ("zlib" ,zlib)))
+ (native-inputs
+ `(("autoconf" ,autoconf)
+ ("automake" ,automake)
+ ("which" ,which)))
+ (home-page "https://csb5.github.io/lofreq/")
+ (synopsis "Sensitive variant calling from sequencing data ")
+ (description "LoFreq is a fast and sensitive variant-caller for inferring
+SNVs and indels from next-generation sequencing data. It makes full use of
+base-call qualities and other sources of errors inherent in
+sequencing (e.g. mapping or base/indel alignment uncertainty), which are
+usually ignored by other methods or only used for filtering.")
+ (license license:expat)))
+
+(define-public python-pyliftover
+ (package
+ (name "python-pyliftover")
+ (version "0.4")
+ ;; The version of pypi does not include test data.
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/konstantint/pyliftover")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "1j8jp9iynv2l3jv5pr0pn0p3azlama1bqg233piglzm6bqh3m2m3"))))
+ (build-system python-build-system)
+ (arguments `(#:tests? #false)) ; the tests access the web
+ (native-inputs
+ `(("python-pytest" ,python-pytest)))
+ (home-page "https://github.com/konstantint/pyliftover")
+ (synopsis "Python implementation of UCSC liftOver genome coordinate conversion")
+ (description
+ "PyLiftover is a library for quick and easy conversion of genomic (point)
+coordinates between different assemblies.")
+ (license license:expat)))
+
+(define-public ensembl-vep
+ (let* ((api-version "103")
+ (api-module
+ (lambda (name hash)
+ (origin (method git-fetch)
+ (uri (git-reference
+ (url (string-append "https://github.com/Ensembl/"
+ name ".git"))
+ (commit (string-append "release/" api-version))))
+ (file-name (string-append name "-" api-version "-checkout"))
+ (sha256 (base32 hash))))))
+ (package
+ (name "ensembl-vep")
+ (version (string-append api-version ".1"))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/Ensembl/ensembl-vep.git")
+ (commit (string-append "release/" version))))
+ (sha256
+ (base32
+ "1iq7p72cv9b38jz2v8a4slzy2n8y0md487943180ym9xc8qvw09c"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:modules ((guix build gnu-build-system)
+ (guix build utils)
+ (ice-9 match))
+ #:phases
+ (modify-phases %standard-phases
+ (delete 'configure)
+ (delete 'build)
+ ;; Tests need to run after installation
+ (delete 'check)
+ (replace 'install
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (let* ((modules '(("ensembl" "/")
+ ("ensembl-variation" "/Variation")
+ ("ensembl-funcgen" "/Funcgen")
+ ("ensembl-io" "/")))
+ (scripts '(("convert_cache.pl" "vep_convert_cache.pl")
+ ("INSTALL.pl" "vep_install.pl")
+ ("haplo" #f)
+ ("variant_recoder" #f)
+ ("filter_vep" #f)
+ ("vep" #f)))
+ (out (assoc-ref outputs "out"))
+ (bin (string-append out "/bin"))
+ (perl (string-append out "/lib/perl5/site_perl")))
+ (for-each
+ (match-lambda
+ ((name path)
+ (let ((dir (string-append perl "/Bio/EnsEMBL" path)))
+ (mkdir-p dir)
+ (copy-recursively
+ (string-append (assoc-ref inputs (string-append "api-module-" name))
+ "/modules/Bio/EnsEMBL" path)
+ dir))))
+ modules)
+ (copy-recursively "modules/" perl)
+ (mkdir-p bin)
+ (for-each
+ (match-lambda
+ ((script new-name)
+ (let ((location (string-append bin "/"
+ (or new-name (basename script)))))
+ (copy-file script location)
+ (chmod location #o555)
+ (wrap-program location
+ `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")
+ ,perl))))))
+ scripts)
+
+ ;; Fix path to tools
+ (with-directory-excursion (string-append perl "/Bio/EnsEMBL")
+ (substitute* '("Funcgen/RunnableDB/ProbeMapping/PrePipelineChecks.pm"
+ "VEP/BaseRunner.pm"
+ "VEP/Utils.pm"
+ "VEP/AnnotationSource/Cache/VariationTabix.pm"
+ "VEP/AnnotationSource/Cache/BaseSerialized.pm"
+ "Variation/Utils/BaseVepTabixPlugin.pm"
+ "Variation/Utils/VEP.pm"
+ "Variation/Pipeline/ReleaseDataDumps/PreRunChecks.pm")
+ (("`which")
+ (string-append "`"
+ (assoc-ref inputs "which")
+ "/bin/which")))))))
+ (add-after 'install 'check
+ (lambda* (#:key tests? inputs outputs #:allow-other-keys)
+ (when tests?
+ (setenv "PERL5LIB"
+ (string-append (getenv "PERL5LIB")
+ ":"
+ (assoc-ref outputs "out")
+ "/lib/perl5/site_perl"))
+ (copy-recursively (string-append (assoc-ref inputs "source") "/t")
+ "/tmp/t")
+ (for-each make-file-writable (find-files "/tmp/t"))
+ ;; TODO: haplo needs Set/IntervalTree.pm
+ (invoke "perl" "-e" (string-append "
+use Test::Harness; use Test::Exception;
+my $dirname = \"/tmp\";
+opendir TEST, \"$dirname\\/t\";
+my @test_files = map {\"$dirname\\/t\\/\".$_} grep {!/^\\./ && /\\.t$/} readdir TEST; closedir TEST;
+@test_files = grep {!/Haplo/} @test_files;
+runtests(@test_files);
+"))))))))
+ (inputs
+ `(("bioperl-minimal" ,bioperl-minimal)
+ ("perl-bio-db-hts" ,perl-bio-db-hts)
+ ("perl-dbi" ,perl-dbi)
+ ("perl-dbd-mysql" ,perl-dbd-mysql)
+ ("perl-libwww" ,perl-libwww)
+ ("perl-http-tiny" ,perl-http-tiny)
+ ("perl-json" ,perl-json)
+ ("which" ,which)))
+ (propagated-inputs
+ `(("kentutils" ,kentutils)))
+ (native-inputs
+ `(("unzip" ,unzip)
+ ("perl" ,perl)
+ ("api-module-ensembl"
+ ,(api-module "ensembl"
+ "0s59rj905g72hljzfpvnx5nxwz925b917y4jp912i23f5gwxh14v"))
+ ("api-module-ensembl-variation"
+ ,(api-module "ensembl-variation"
+ "1dvwdzzfjhzymq02b6n4p6j3a9q4jgq0g89hs7hj1apd7zhirgkq"))
+ ("api-module-ensembl-funcgen"
+ ,(api-module "ensembl-funcgen"
+ "1x23pv38dmv0w0gby6rv3wds50qghb4v3v1mf43vk55msfxzry8n"))
+ ("api-module-ensembl-io"
+ ,(api-module "ensembl-io"
+ "14adb2x934lzsq20035mazdkhrkcw0qzb0xhz6zps9vk4wixwaix"))
+ ("perl-test-harness" ,perl-test-harness)
+ ("perl-test-exception" ,perl-test-exception)))
+ (home-page "http://www.ensembl.org/vep")
+ (synopsis "Predict functional effects of genomic variants")
+ (description
+ "This package provides a Variant Effect Predictor, which predicts
+the functional effects of genomic variants. It also provides
+Haplosaurus, which uses phased genotype data to predict
+whole-transcript haplotype sequences, and Variant Recoder, which
+translates between different variant encodings.")
+ (license license:asl2.0))))
+
(define-public r-signac
(let ((commit "e0512d348adeda4a3f23a2e8f56d1fe09840e03c")
(revision "1"))