summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm369
1 files changed, 364 insertions, 5 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 229fd34d2a..3ddb91714a 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -106,6 +106,7 @@
#:use-module (gnu packages java-compression)
#:use-module (gnu packages jemalloc)
#:use-module (gnu packages jupyter)
+ #:use-module (gnu packages libffi)
#:use-module (gnu packages linux)
#:use-module (gnu packages logging)
#:use-module (gnu packages lsof)
@@ -1216,14 +1217,14 @@ sequencing.")
(define-public python-biopython
(package
(name "python-biopython")
- (version "1.73")
+ (version "1.76")
(source (origin
(method url-fetch)
;; use PyPi rather than biopython.org to ease updating
(uri (pypi-uri "biopython" version))
(sha256
(base32
- "1q55jhf76z3k6is3psis0ckbki7df26x7dikpcc3vhk1vhkwribh"))))
+ "0wlch9xpa0fpgjzyxi6jsfca6iakaq9a05927xg8vqnmvaccnwrq"))))
(build-system python-build-system)
(arguments
`(#:phases
@@ -1244,6 +1245,18 @@ dealing with alignments; code making it easy to split up parallelizable tasks
into separate processes; and more.")
(license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
+(define-public python-biopython-1.73
+ (package
+ (inherit python-biopython)
+ (version "1.73")
+ (source (origin
+ (method url-fetch)
+ ;; use PyPi rather than biopython.org to ease updating
+ (uri (pypi-uri "biopython" version))
+ (sha256
+ (base32
+ "1q55jhf76z3k6is3psis0ckbki7df26x7dikpcc3vhk1vhkwribh"))))))
+
(define-public python2-biopython
(package-with-python2 python-biopython))
@@ -1270,6 +1283,133 @@ relying on a complex dependency tree.")
(define-public python2-fastalite
(package-with-python2 python-fastalite))
+(define-public ciri-long
+ (package
+ (name "ciri-long")
+ (version "1.0.2")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/bioinfo-biols/CIRI-long")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "10k88i1fcqchrrjv82rmylwvbwqfba0n51palhig9hsg71xs0dbi"))
+ ;; Delete bundled binary
+ (snippet '(delete-file "libs/ccs"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'relax-requirements
+ (lambda _
+ (substitute* "setup.py"
+ (("'argparse[^']*',") "") ; only for python2
+ (("==") ">="))))
+ (add-before 'build 'build-libssw
+ (lambda _
+ (with-directory-excursion "libs/striped_smith_waterman"
+ (invoke "make" "libssw.so")))))))
+ (inputs
+ `(("python-biopython" ,python-biopython)
+ ("python-bwapy" ,python-bwapy)
+ ("python-cython" ,python-cython)
+ ("python-levenshtein" ,python-levenshtein)
+ ("python-mappy" ,python-mappy)
+ ("python-numpy" ,python-numpy)
+ ("python-pandas" ,python-pandas)
+ ("python-pysam" ,python-pysam)
+ ("python-pyspoa" ,python-pyspoa)
+ ("python-scikit-learn" ,python-scikit-learn)
+ ("python-scipy" ,python-scipy)))
+ (native-inputs
+ `(("python-nose" ,python-nose)
+ ("python-setuptools" ,python-setuptools)))
+ (home-page "https://ciri-cookbook.readthedocs.io/")
+ (synopsis "Circular RNA identification for Nanopore sequencing")
+ (description "CIRI-long is a package for circular RNA identification using
+long-read sequencing data.")
+ (license license:expat)))
+
+(define-public qtltools
+ (package
+ (name "qtltools")
+ (version "1.3.1")
+ (source (origin
+ (method url-fetch/tarbomb)
+ (uri (string-append "https://qtltools.github.io/qtltools/"
+ "binaries/QTLtools_" version
+ "_source.tar.gz"))
+ (sha256
+ (base32
+ "13gdry5l43abn3464fmk8qzrxgxnxah2612r66p9dzhhl92j30cd"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:tests? #f ; no tests included
+ #:make-flags
+ (list (string-append "BOOST_INC="
+ (assoc-ref %build-inputs "boost") "/include")
+ (string-append "BOOST_LIB="
+ (assoc-ref %build-inputs "boost") "/lib")
+ (string-append "HTSLD_INC="
+ (assoc-ref %build-inputs "htslib") "/include")
+ (string-append "HTSLD_LIB="
+ (assoc-ref %build-inputs "htslib") "/lib")
+ (string-append "RMATH_INC="
+ (assoc-ref %build-inputs "rmath-standalone")
+ "/include")
+ (string-append "RMATH_LIB="
+ (assoc-ref %build-inputs "rmath-standalone")
+ "/lib"))
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'fix-linkage
+ (lambda _
+ (substitute* "qtltools/Makefile"
+ (("libboost_iostreams.a")
+ "libboost_iostreams.so")
+ (("libboost_program_options.a")
+ "libboost_program_options.so")
+ (("-lblas") "-lopenblas"))))
+ (add-before 'build 'chdir
+ (lambda _ (chdir "qtltools")))
+ (replace 'configure
+ (lambda _
+ (substitute* "qtltools/Makefile"
+ (("LIB_FLAGS=-lz")
+ "LIB_FLAGS=-lz -lcrypto -lssl")
+ (("LIB_FILES=\\$\\(RMATH_LIB\\)/libRmath.a \
+\\$\\(HTSLD_LIB\\)/libhts.a \
+\\$\\(BOOST_LIB\\)/libboost_iostreams.a \
+\\$\\(BOOST_LIB\\)/libboost_program_options.a")
+ "LIB_FILES=$(RMATH_LIB)/libRmath.so \
+$(HTSLD_LIB)/libhts.so \
+$(BOOST_LIB)/libboost_iostreams.so \
+$(BOOST_LIB)/libboost_program_options.so"))))
+ (replace 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
+ (mkdir-p bin)
+ (install-file "bin/QTLtools" bin)))))))
+ (inputs
+ `(("curl" ,curl)
+ ("gsl" ,gsl)
+ ("boost" ,boost)
+ ("rmath-standalone" ,rmath-standalone)
+ ("htslib" ,htslib-1.3)
+ ("openssl" ,openssl)
+ ("openblas" ,openblas)
+ ("zlib" ,zlib)))
+ (home-page "https://qtltools.github.io/qtltools/")
+ (synopsis "Tool set for molecular QTL discovery and analysis")
+ (description "QTLtools is a tool set for molecular QTL discovery
+and analysis. It allows to go from the raw genetic sequence data to
+collection of molecular @dfn{Quantitative Trait Loci} (QTLs) in few
+easy-to-perform steps.")
+ (license license:gpl3+)))
+
(define-public bpp-core
;; The last release was in 2014 and the recommended way to install from source
;; is to clone the git repository, so we do this.
@@ -7448,6 +7588,33 @@ clustering analysis, differential analysis, motif inference and exploration of
single cell ATAC-seq sequencing data.")
(license license:gpl3)))
+(define-public r-umi4cpackage
+ (let ((commit "88b07d896a137418ba6c31c2474b9dbe1d86fc20")
+ (revision "1"))
+ (package
+ (name "r-umi4cpackage")
+ (version (git-version "0.0.1" revision commit))
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/tanaylab/umi4cpackage")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "0bjzamdw2lcfhlbzc0vdva87c3wwnij8jsvnrpx4wyyxvpcz13m5"))))
+ (properties `((upstream-name . "umi4cPackage")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-misha" ,r-misha)
+ ("r-zoo" ,r-zoo)))
+ (native-inputs `(("r-knitr" ,r-knitr)))
+ (home-page "https://github.com/tanaylab/umi4cpackage")
+ (synopsis "Processing and analysis of UMI-4C contact profiles.")
+ (description "This is a package that lets you process UMI-4C data from
+scratch to produce nice plots.")
+ (license license:expat))))
+
(define-public r-shinycell
(let ((commit
"aecbd56e66802f28e397f5ae1f19403aadd12163")
@@ -7576,6 +7743,86 @@ of transcriptional heterogeneity among single cells.")
;; See https://github.com/hms-dbmi/scde/issues/38
(license license:gpl2)))
+(define-public r-misha
+ (package
+ (name "r-misha")
+ (version "4.1.0")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/tanaylab/misha")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0bgivx3lzjh3173jsfrhb5kvhjsn53br0n4hmyx7i3dwy2cnnp2p"))
+ ;; Delete bundled executable.
+ (snippet
+ '(delete-file "exec/bigWigToWig"))))
+ (build-system r-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'do-not-use-bundled-bigWigToWig
+ (lambda* (#:key inputs #:allow-other-keys)
+ (substitute* "R/misha.R"
+ (("get\\(\".GLIBDIR\"\\), \"/exec/bigWigToWig")
+ (string-append "\""
+ (assoc-ref inputs "kentutils")
+ "/bin/bigWigToWig"))))))))
+ (inputs
+ `(("kentutils" ,kentutils)))
+ (home-page "https://github.com/tanaylab/misha")
+ (synopsis "Toolkit for analysis of genomic data")
+ (description "This package is intended to help users to efficiently
+analyze genomic data resulting from various experiments.")
+ (license license:gpl2)))
+
+(define-public r-shaman
+ (let ((commit "d6944e8ac7bd1dbd5c6cec646eafc1d19d0ca96f")
+ (release "2.0")
+ (revision "2"))
+ (package
+ (name "r-shaman")
+ (version (git-version release revision commit))
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/tanaylab/shaman")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "03sx138dzpfiq23j49z0m0s4j79855mrg64hpj9c83408wzphxi6"))
+ (snippet
+ ;; This file will be generated.
+ '(delete-file "inst/doc/shaman-package.R"))))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-data-table" ,r-data-table)
+ ("r-domc" ,r-domc)
+ ("r-ggplot2" ,r-ggplot2)
+ ("r-gviz" ,r-gviz)
+ ("r-misha" ,r-misha)
+ ("r-plyr" ,r-plyr)
+ ("r-rann" ,r-rann)
+ ("r-rcpp" ,r-rcpp)
+ ("r-reshape2" ,r-reshape2)
+ ;; For vignettes
+ ("r-rmarkdown" ,r-rmarkdown)
+ ("r-knitr" ,r-knitr)))
+ (home-page "https://github.com/tanaylab/shaman")
+ (synopsis "Sampling HiC contact matrices for a-parametric normalization")
+ (description "The Shaman package implements functions for
+resampling Hi-C matrices in order to generate expected contact
+distributions given constraints on marginal coverage and
+contact-distance probability distributions. The package also provides
+support for visualizing normalized matrices and statistical analysis
+of contact distributions around selected landmarks.")
+ ;; Any version of the GPL
+ (license license:gpl3+))))
+
(define-public r-centipede
(package
(name "r-centipede")
@@ -7639,6 +7886,46 @@ includes software to
")
(license license:cc0))))
+(define-public gdc-client
+ (package
+ (name "gdc-client")
+ (version "1.6.0")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/NCI-GDC/gdc-client.git")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "0cagawlzjwj3wam10lv64xgbfx4zcnzxi5sjpsdhq7rn4z24mzc2"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'relax-requirements
+ (lambda _
+ (substitute* "requirements.txt"
+ (("==") ">=")))))))
+ (inputs
+ `(("python-cryptography" ,python-cryptography)
+ ("python-intervaltree" ,python-intervaltree)
+ ("python-jsonschema" ,python-jsonschema)
+ ("python-lxml" ,python-lxml)
+ ("python-ndg-httpsclient" ,python-ndg-httpsclient)
+ ("python-progressbar2" ,python-progressbar2)
+ ("python-pyasn1" ,python-pyasn1)
+ ("python-pyopenssl" ,python-pyopenssl)
+ ("python-pyyaml" ,python-pyyaml)
+ ("python-requests" ,python-requests)
+ ("python-termcolor" ,python-termcolor)))
+ (home-page "https://gdc.nci.nih.gov/access-data/gdc-data-transfer-tool")
+ (synopsis "GDC data transfer tool")
+ (description "The gdc-client provides several convenience functions over
+the GDC API which provides general download/upload via HTTPS.")
+ (license license:asl2.0)))
+
(define-public vsearch
(package
(name "vsearch")
@@ -11642,14 +11929,14 @@ set.")
(define-public instrain
(package
(name "instrain")
- (version "1.5.2")
+ (version "1.5.4")
(source
(origin
(method url-fetch)
(uri (pypi-uri "inStrain" version))
(sha256
(base32
- "0ykqlpf6yz4caihsaz3ys00cyvlr7wdj4s9a8rh56q5r8xf80ic0"))))
+ "05w1lw75x4lwkzg4qpi055g7hdjp9rnc4ksbxg2hfgksq9djk0hx"))))
(build-system python-build-system)
(arguments
`(#:phases
@@ -11662,7 +11949,7 @@ set.")
(("from job_utils")
"from .job_utils")))))))
(inputs
- `(("python-biopython" ,python-biopython)
+ `(("python-biopython" ,python-biopython-1.73)
("python-boto3" ,python-boto3)
("python-h5py" ,python-h5py)
("python-lmfit" ,python-lmfit)
@@ -14995,6 +15282,78 @@ line, interactively explore genomic data within Jupyter environment or web
browser.")
(license license:gpl3+)))
+(define-public python-pyspoa
+ (package
+ (name "python-pyspoa")
+ (version "0.0.5")
+ (source
+ (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/nanoporetech/pyspoa")
+ (commit (string-append "v" version))
+ (recursive? #true)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "1lgf2shzhxkcsircd6vy46h27pjljd5q95fyz1cm3lkk702qbnzx"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-before 'build 'build-libspoa
+ (lambda _
+ (mkdir-p "src/build")
+ (with-directory-excursion "src/build"
+ (invoke "cmake"
+ "-Dspoa_optimize_for_portability=ON"
+ "-DCMAKE_BUILD_TYPE=Release"
+ "-DCMAKE_CXX_FLAGS=\"-I ../vendor/cereal/include/\" -fPIC"
+ "..")
+ (invoke "make"))))
+ (replace 'check
+ (lambda* (#:key inputs outputs tests? #:allow-other-keys)
+ (when tests?
+ (add-installed-pythonpath inputs outputs)
+ (invoke "python" "tests/test_pyspoa.py")))))))
+ (propagated-inputs
+ `(("pybind11" ,pybind11)))
+ (native-inputs
+ `(("cmake" ,cmake-minimal)))
+ (home-page "https://github.com/nanoporetech/pyspoa")
+ (synopsis "Python bindings for the SIMD partial order alignment library ")
+ (description
+ "This package provides Python bindings for spoa, a C++ implementation of
+the @dfn{partial order alignment} (POA) algorithm (as described in
+10.1093/bioinformatics/18.3.452) which is used to generate consensus
+sequences")
+ (license license:expat)))
+
+(define-public python-bwapy
+ (package
+ (name "python-bwapy")
+ (version "0.1.4")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "bwapy" version))
+ (sha256
+ (base32 "090qwx3vl729zn3a7sksbviyg04kc71gpbm3nd8dalqp673x1npw"))))
+ (build-system python-build-system)
+ (propagated-inputs
+ `(("python-cffi" ,python-cffi)
+ ("python-setuptools" ,python-setuptools)
+ ("python-wheel" ,python-wheel)))
+ (inputs
+ `(("zlib" ,zlib)))
+ (home-page "https://github.com/ACEnglish/bwapy")
+ (synopsis "Python bindings to bwa alinger")
+ (description "This package provides Python bindings to the bwa mem
+aligner.")
+ ;; These Python bindings are licensed under Mozilla Public License 2.0,
+ ;; bwa itself is licenced under GNU General Public License v3.0.
+ (license license:mpl2.0)))
+
(define-public scregseg
(package
(name "scregseg")