summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm359
1 files changed, 302 insertions, 57 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 002b417b54..0b87d37bcf 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -388,10 +388,10 @@ transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
(license (list license:gpl3+ license:expat))))
-(define-public bcftools-1.9
+(define-public bcftools-1.10
(package (inherit bcftools)
(name "bcftools")
- (version "1.9")
+ (version "1.10")
(source (origin
(method url-fetch)
(uri (string-append "https://github.com/samtools/bcftools/"
@@ -399,15 +399,15 @@ transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
version "/bcftools-" version ".tar.bz2"))
(sha256
(base32
- "1j3h638i8kgihzyrlnpj82xg1b23sijibys9hvwari3fy7kd0dkg"))
+ "10xgwfdgqb6dsmr3ndnpb77mc3a38dy8kh2c6czn6wj7jhdp4dra"))
(modules '((guix build utils)))
(snippet '(begin
;; Delete bundled htslib.
- (delete-file-recursively "htslib-1.9")
+ (delete-file-recursively "htslib-1.10")
#t))))
(build-system gnu-build-system)
(native-inputs
- `(("htslib" ,htslib-1.9)
+ `(("htslib" ,htslib-1.10)
("perl" ,perl)))))
(define-public bedops
@@ -1213,14 +1213,14 @@ sequencing.")
(define-public python-biopython
(package
(name "python-biopython")
- (version "1.70")
+ (version "1.73")
(source (origin
(method url-fetch)
;; use PyPi rather than biopython.org to ease updating
(uri (pypi-uri "biopython" version))
(sha256
(base32
- "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
+ "1q55jhf76z3k6is3psis0ckbki7df26x7dikpcc3vhk1vhkwribh"))))
(build-system python-build-system)
(arguments
`(#:phases
@@ -2043,7 +2043,7 @@ has several key features:
(define-public python-pysam
(package
(name "python-pysam")
- (version "0.15.1")
+ (version "0.16.0.1")
(source (origin
(method git-fetch)
;; Test data is missing on PyPi.
@@ -2053,7 +2053,7 @@ has several key features:
(file-name (git-file-name name version))
(sha256
(base32
- "1vj367w6xbn9bpmksm162l1aipf7cj97h1q83y7jcpm33ihwpf7x"))
+ "168bwwm8c2k22m7paip8q0yajyl7xdxgnik0bgjl7rhqg0majz0f"))
(modules '((guix build utils)))
(snippet '(begin
;; Drop bundled htslib. TODO: Also remove samtools
@@ -2073,36 +2073,37 @@ has several key features:
(setenv "LDFLAGS" "-lncurses")
(setenv "CFLAGS" "-D_CURSES_LIB=1")))
(replace 'check
- (lambda _
- ;; This file contains tests that require a connection to the
- ;; internet.
- (delete-file "tests/tabix_test.py")
+ (lambda* (#:key tests? #:allow-other-keys)
;; FIXME: These tests fail with "AttributeError: 'array.array'
;; object has no attribute 'tostring'".
(delete-file "tests/AlignmentFile_test.py")
- (delete-file "tests/AlignedSegment_test.py")
- ;; Step out of source dir so python does not import from CWD.
- (with-directory-excursion "tests"
- (setenv "HOME" "/tmp")
- (invoke "make" "-C" "pysam_data")
- (invoke "make" "-C" "cbcf_data")
- ;; Running nosetests without explicitly asking for a single
- ;; process leads to a crash. Running with multiple processes
- ;; fails because the tests are not designed to run in parallel.
-
- ;; FIXME: tests keep timing out on some systems.
- (invoke "nosetests" "-v" "--processes" "1")))))))
+ (when tests?
+ ;; Step out of source dir so python does not import from CWD.
+ (with-directory-excursion "tests"
+ (setenv "HOME" "/tmp")
+ (invoke "make" "-C" "pysam_data")
+ (invoke "make" "-C" "cbcf_data")
+ (invoke "pytest" "-k"
+ (string-append
+ ;; requires network access.
+ "not FileHTTP"
+ ;; bug in test suite with samtools update
+ ;; https://github.com/pysam-developers/pysam/issues/961
+ " and not TestHeaderBAM"
+ " and not TestHeaderCRAM"
+ " and not test_text_processing")))))))))
(propagated-inputs
- `(("htslib" ,htslib-1.9))) ; Included from installed header files.
+ `(("htslib" ,htslib-1.10))) ; Included from installed header files.
(inputs
`(("ncurses" ,ncurses)
("curl" ,curl)
("zlib" ,zlib)))
(native-inputs
`(("python-cython" ,python-cython)
+ ("python-pytest" ,python-pytest)
;; Dependencies below are are for tests only.
- ("samtools" ,samtools-1.9)
- ("bcftools" ,bcftools-1.9)
+ ("samtools" ,samtools-1.10)
+ ("bcftools" ,bcftools-1.10)
("python-nose" ,python-nose)))
(home-page "https://github.com/pysam-developers/pysam")
(synopsis "Python bindings to the SAMtools C API")
@@ -3589,7 +3590,7 @@ results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
("zlib" ,zlib)))
(native-inputs
`(("pkg-config" ,pkg-config)
- ("seqan" ,seqan)))
+ ("seqan" ,seqan-2)))
(home-page "https://github.com/seqan/flexbar")
(synopsis "Barcode and adapter removal tool for sequencing platforms")
(description
@@ -4603,6 +4604,19 @@ data. It also provides the @command{bgzip}, @command{htsfile}, and
;; the rest is released under the Expat license
(license (list license:expat license:bsd-3))))
+(define-public htslib-1.10
+ (package (inherit htslib)
+ (name "htslib")
+ (version "1.10")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append
+ "https://github.com/samtools/htslib/releases/download/"
+ version "/htslib-" version ".tar.bz2"))
+ (sha256
+ (base32
+ "0wm9ay7qgypj3mwx9zl1mrpnr36298b1aj5vx69l4k7bzbclvr3s"))))))
+
(define-public htslib-1.9
(package (inherit htslib)
(name "htslib")
@@ -6051,10 +6065,10 @@ variant calling (in conjunction with bcftools), and a simple alignment
viewer.")
(license license:expat)))
-(define-public samtools-1.9
+(define-public samtools-1.10
(package (inherit samtools)
(name "samtools")
- (version "1.9")
+ (version "1.10")
(source
(origin
(method url-fetch)
@@ -6063,14 +6077,14 @@ viewer.")
version "/samtools-" version ".tar.bz2"))
(sha256
(base32
- "10ilqbmm7ri8z431sn90lvbjwizd0hhkf9rcqw8j823hf26nhgq8"))
+ "119ms0dpydw8dkh3zc4yyw9zhdzgv12px4l2kayigv31bpqcb7kv"))
(modules '((guix build utils)))
(snippet '(begin
;; Delete bundled htslib.
- (delete-file-recursively "htslib-1.9")
+ (delete-file-recursively "htslib-1.10")
#t))))
(inputs
- `(("htslib" ,htslib-1.9)
+ `(("htslib" ,htslib-1.10)
("ncurses" ,ncurses)
("perl" ,perl)
("python" ,python)
@@ -6706,6 +6720,39 @@ writing files into the .sra format.")
(define-public seqan
(package
(name "seqan")
+ (version "3.0.3")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "https://github.com/seqan/seqan3/releases/"
+ "download/" version "/seqan3-"
+ version "-Source.tar.xz"))
+ (sha256
+ (base32
+ "1h2z0cvgidhkmh5xsbw75waqbrqbbv6kkrvb0b92xfh3gqpaiz22"))))
+ (build-system cmake-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (replace 'check
+ (lambda _
+ (invoke "ctest" "test" "--output-on-failure"))))))
+ (native-inputs
+ `(("bzip2" ,bzip2)
+ ("cereal" ,cereal)
+ ("zlib" ,zlib)))
+ (home-page "https://www.seqan.de")
+ (synopsis "Library for nucleotide sequence analysis")
+ (description
+ "SeqAn is a C++ library of efficient algorithms and data structures for
+the analysis of sequences with the focus on biological data. It contains
+algorithms and data structures for string representation and their
+manipulation, online and indexed string search, efficient I/O of
+bioinformatics file formats, sequence alignment, and more.")
+ (license license:bsd-3)))
+
+(define-public seqan-2
+ (package
+ (inherit seqan)
(version "2.4.0")
(source (origin
(method url-fetch)
@@ -6737,16 +6784,7 @@ writing files into the .sra format.")
(native-inputs
`(("source" ,source)
("tar" ,tar)
- ("xz" ,xz)))
- (home-page "https://www.seqan.de")
- (synopsis "Library for nucleotide sequence analysis")
- (description
- "SeqAn is a C++ library of efficient algorithms and data structures for
-the analysis of sequences with the focus on biological data. It contains
-algorithms and data structures for string representation and their
-manipulation, online and indexed string search, efficient I/O of
-bioinformatics file formats, sequence alignment, and more.")
- (license license:bsd-3)))
+ ("xz" ,xz)))))
(define-public seqan-1
(package (inherit seqan)
@@ -9310,6 +9348,54 @@ using nucleotide or amino-acid sequence data.")
;; GPLv3 only
(license license:gpl3)))
+(define-public segemehl
+ (package
+ (name "segemehl")
+ (version "0.3.4")
+ (source (origin
+ (method url-fetch)
+ (uri (string-append "https://www.bioinf.uni-leipzig.de/Software"
+ "/segemehl/downloads/segemehl-"
+ version ".tar.gz"))
+ (sha256
+ (base32
+ "0lbzbb7i8zadsn9b99plairhq6s2h1z8qdn6n7djclfis01nycz4"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:make-flags
+ (list (string-append "CC=" ,(cc-for-target))
+ "all")
+ #:tests? #false ; there are none
+ #:phases
+ (modify-phases %standard-phases
+ (delete 'configure)
+ ;; There is no installation target
+ (replace 'install
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (let* ((out (assoc-ref outputs "out"))
+ (bin (string-append out "/bin"))
+ (exes (list "segemehl.x" "haarz.x")))
+ (mkdir-p bin)
+ (for-each (lambda (exe)
+ (install-file exe bin))
+ exes)))))))
+ (inputs
+ `(("htslib" ,htslib)
+ ("ncurses" ,ncurses)
+ ("zlib" ,zlib)))
+ (native-inputs
+ `(("pkg-config" ,pkg-config)))
+ (home-page "https://www.bioinf.uni-leipzig.de/Software/segemehl")
+ (synopsis "Map short sequencer reads to reference genomes")
+ (description "Segemehl is software to map short sequencer reads to
+reference genomes. Segemehl implements a matching strategy based on enhanced
+suffix arrays (ESA). It accepts fasta and fastq queries (gzip'ed and
+bgzip'ed). In addition to the alignment of reads from standard DNA- and
+RNA-seq protocols, it also allows the mapping of bisulfite converted
+reads (Lister and Cokus) and implements a split read mapping strategy. The
+output of segemehl is a SAM or BAM formatted alignment file.")
+ (license license:gpl3+)))
+
(define-public kallisto
(package
(name "kallisto")
@@ -12991,28 +13077,32 @@ downstream analysis.")
("taxtastic" ,taxtastic)))
(synopsis "Pplacer Python scripts")))
-(define-public python2-checkm-genome
+(define-public checkm
(package
- (name "python2-checkm-genome")
- (version "1.0.13")
+ (name "checkm")
+ (version "1.1.3")
(source
(origin
(method url-fetch)
(uri (pypi-uri "checkm-genome" version))
(sha256
(base32
- "0bm8gpxjmzxsxxl8lzwqhgx8g1dlnmp6znz7wv3hgb0gdjbf9dzz"))))
+ "0i2nnki639hgjag17wlva2x0ymn37b4krqsf6akxddykhfbkdnkz"))))
(build-system python-build-system)
(arguments
- `(#:python ,python-2
- #:tests? #f)) ; some tests are interactive
- (propagated-inputs
- `(("python-dendropy" ,python2-dendropy)
- ("python-matplotlib" ,python2-matplotlib)
- ("python-numpy" ,python2-numpy)
- ("python-pysam" ,python2-pysam)
- ("python-scipy" ,python2-scipy)))
- (home-page "https://pypi.org/project/Checkm/")
+ `(#:tests? #f ; Some tests fail for unknown reasons.
+ #:phases
+ (modify-phases %standard-phases
+ (add-before 'check 'set-HOME
+ (lambda _
+ (setenv "HOME" "/tmp"))))))
+ (inputs
+ `(("python-dendropy" ,python-dendropy)
+ ("python-matplotlib" ,python-matplotlib)
+ ("python-numpy" ,python-numpy)
+ ("python-pysam" ,python-pysam)
+ ("python-scipy" ,python-scipy)))
+ (home-page "https://ecogenomics.github.io/CheckM/")
(synopsis "Assess the quality of putative genome bins")
(description
"CheckM provides a set of tools for assessing the quality of genomes
@@ -13027,6 +13117,9 @@ on marker set compatibility, similarity in genomic characteristics, and
proximity within a reference genome.")
(license license:gpl3+)))
+(define-public python2-checkm-genome
+ (deprecated-package "python2-checkm-genome" checkm))
+
(define-public umi-tools
(package
(name "umi-tools")
@@ -14490,3 +14583,155 @@ quantifying single-cell chromatin data, computing per-cell quality control
metrics, dimension reduction and normalization, visualization, and DNA
sequence motif analysis.")
(license license:expat))))
+
+(define-public tombo
+ (package
+ (name "tombo")
+ (version "1.5.1")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "ont-tombo" version))
+ (sha256
+ (base32
+ "1023hadgcsgi53kz53ql45207hfizf9sw57z0qij3ay1bx68zbpm"))))
+ (build-system python-build-system)
+ (native-inputs
+ `(("python-cython" ,python-cython)
+ ("python-nose2" ,python-nose2)))
+ ;; The package mainly consists of a command-line tool, but also has a
+ ;; Python-API. Thus these must be propagated.
+ (propagated-inputs
+ `(("python-future" ,python-future)
+ ("python-h5py" ,python-h5py)
+ ("python-mappy" ,python-mappy)
+ ("python-numpy" ,python-numpy)
+ ("python-scipy" ,python-scipy)
+ ("python-tqdm" ,python-tqdm)
+ ("python-rpy2" ,python-rpy2)))
+ (home-page "https://github.com/nanoporetech/tombo")
+ (synopsis "Analysis of raw nanopore sequencing data")
+ (description "Tombo is a suite of tools primarily for the identification of
+modified nucleotides from nanopore sequencing data. Tombo also provides tools
+for the analysis and visualization of raw nanopore signal.")
+ ;; Some parts may be BSD-3-licensed.
+ (license license:mpl2.0)))
+
+(define-public python-pyvcf
+ (package
+ (name "python-pyvcf")
+ (version "0.6.8")
+ ;; Use git, because the PyPI tarballs lack test data.
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/jamescasbon/PyVCF.git")
+ ;; Latest release is not tagged.
+ (commit "bfcedb9bad1a14074ac4526ffdb610611e073810")))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0c7lsssns3zp8fh2ibllzzra003srg9vbxqzmq6654akbzdb7lrf"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'install 'remove-installed-tests
+ ;; Do not install test files.
+ (lambda* (#:key inputs outputs #:allow-other-keys)
+ (delete-file-recursively (string-append
+ (site-packages inputs outputs)
+ "vcf/test"))
+ #t)))))
+ (native-inputs `(("python-cython" ,python-cython)))
+ (propagated-inputs
+ `(("python-pysam" ,python-pysam)
+ ("python-rpy2" ,python-rpy2)))
+ (home-page "https://github.com/jamescasbon/PyVCF")
+ (synopsis "Variant Call Format parser for Python")
+ (description "This package provides a @acronym{VCF,Variant Call Format}
+parser for Python.")
+ (license license:expat)))
+
+(define-public nanosv
+ (package
+ (name "nanosv")
+ (version "1.2.4")
+ (source (origin
+ (method url-fetch)
+ (uri (pypi-uri "NanoSV" version))
+ (sha256
+ (base32
+ "1wl2daj0bwrl8fx5xi8j8hfs3mp3vg3qycy66538n032v1qkc6xg"))))
+ (build-system python-build-system)
+ (inputs
+ `(("python-configparser" ,python-configparser)
+ ("python-pysam" ,python-pysam)
+ ("python-pyvcf" ,python-pyvcf)))
+ (home-page "https://github.com/mroosmalen/nanosv")
+ (synopsis "Structural variation detection tool for Oxford Nanopore data.")
+ (description "NanoSV is a software package that can be used to identify
+structural genomic variations in long-read sequencing data, such as data
+produced by Oxford Nanopore Technologies’ MinION, GridION or PromethION
+instruments, or Pacific Biosciences RSII or Sequel sequencers.")
+ (license license:expat)))
+
+(define-public r-ascat
+ (package
+ (name "r-ascat")
+ (version "2.5.2")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/Crick-CancerGenomics/ascat.git")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0cc0y3as6cb64iwnx0pgbajiig7m4z723mns9d5i4j09ccid3ccm"))))
+ (build-system r-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'move-to-ascat-dir
+ (lambda _
+ (chdir "ASCAT"))))))
+ (propagated-inputs
+ `(("r-rcolorbrewer" ,r-rcolorbrewer)))
+ (home-page "https://github.com/VanLoo-lab/ascat/")
+ (synopsis "Allele-Specific Copy Number Analysis of Tumors in R")
+ (description "This package provides the @acronym{ASCAT,Allele-Specific Copy
+Number Analysis of Tumors} R package that can be used to infer tumour purity,
+ploidy and allele-specific copy number profiles.")
+ (license license:gpl3)))
+
+(define-public r-battenberg
+ (package
+ (name "r-battenberg")
+ (version "2.2.9")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/Wedge-lab/battenberg.git")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0nmcq4c7y5g8h8lxsq9vadz9bj4qgqn118alip520ny6czaxki4h"))))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-devtools" ,r-devtools)
+ ("r-readr" ,r-readr)
+ ("r-doparallel" ,r-doparallel)
+ ("r-ggplot2" ,r-ggplot2)
+ ("r-rcolorbrewer" ,r-rcolorbrewer)
+ ("r-gridextra" ,r-gridextra)
+ ("r-gtools" ,r-gtools)
+ ("r-ascat" ,r-ascat)))
+ (home-page "https://github.com/Wedge-lab/battenberg")
+ (synopsis "Subclonal copy number estimation in R")
+ (description "This package contains the Battenberg R package for subclonal
+copy number estimation, as described by
+@url{doi:10.1016/j.cell.2012.04.023,Nik-Zainal et al.}")
+ (license license:gpl3)))