summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm301
1 files changed, 298 insertions, 3 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 7c573e1626..76a1c17737 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -40,6 +40,7 @@
#:use-module (gnu packages compression)
#:use-module (gnu packages cpio)
#:use-module (gnu packages file)
+ #:use-module (gnu packages gawk)
#:use-module (gnu packages java)
#:use-module (gnu packages linux)
#:use-module (gnu packages machine-learning)
@@ -1354,6 +1355,87 @@ supports next-generation sequencing data in fasta/q and csfasta/q format from
Illumina, Roche 454, and the SOLiD platform.")
(license license:gpl3)))
+(define-public fraggenescan
+ (package
+ (name "fraggenescan")
+ (version "1.20")
+ (source
+ (origin
+ (method url-fetch)
+ (uri
+ (string-append "mirror://sourceforge/fraggenescan/"
+ "FragGeneScan" version ".tar.gz"))
+ (sha256
+ (base32 "1zzigqmvqvjyqv4945kv6nc5ah2xxm1nxgrlsnbzav3f5c0n0pyj"))))
+ (build-system gnu-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (delete 'configure)
+ (add-before 'build 'patch-paths
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((out (string-append (assoc-ref outputs "out")))
+ (share (string-append out "/share/fraggenescan/")))
+ (substitute* "run_FragGeneScan.pl"
+ (("system\\(\"rm")
+ (string-append "system(\"" (which "rm")))
+ (("system\\(\"mv")
+ (string-append "system(\"" (which "mv")))
+ ;; This script and other programs expect the training files
+ ;; to be in the non-standard location bin/train/XXX. Change
+ ;; this to be share/fraggenescan/train/XXX instead.
+ (("^\\$train.file = \\$dir.*")
+ (string-append "$train_file = \""
+ share
+ "train/\".$FGS_train_file;")))
+ (substitute* "run_hmm.c"
+ (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
+ (string-append " strcpy(train_dir, \"" share "/train/\");")))
+ (substitute* "post_process.pl"
+ (("^my \\$dir = substr.*")
+ (string-append "my $dir = \"" share "\";"))))
+ #t))
+ (replace 'build
+ (lambda _ (and (zero? (system* "make" "clean"))
+ (zero? (system* "make" "fgs")))))
+ (replace 'install
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((out (string-append (assoc-ref outputs "out")))
+ (bin (string-append out "/bin/"))
+ (share (string-append out "/share/fraggenescan/train")))
+ (install-file "run_FragGeneScan.pl" bin)
+ (install-file "FragGeneScan" bin)
+ (install-file "FGS_gff.py" bin)
+ (install-file "post_process.pl" bin)
+ (copy-recursively "train" share))))
+ (delete 'check)
+ (add-after 'install 'post-install-check
+ ;; In lieu of 'make check', run one of the examples and check the
+ ;; output files gets created.
+ (lambda* (#:key outputs #:allow-other-keys)
+ (let* ((out (string-append (assoc-ref outputs "out")))
+ (bin (string-append out "/bin/")))
+ (and (zero? (system* (string-append bin "run_FragGeneScan.pl")
+ "-genome=./example/NC_000913.fna"
+ "-out=./test2"
+ "-complete=1"
+ "-train=complete"))
+ (file-exists? "test2.faa")
+ (file-exists? "test2.ffn")
+ (file-exists? "test2.gff")
+ (file-exists? "test2.out"))))))))
+ (inputs
+ `(("perl" ,perl)
+ ("python" ,python-2))) ;not compatible with python 3.
+ (home-page "https://sourceforge.net/projects/fraggenescan/")
+ (synopsis "Finds potentially fragmented genes in short reads")
+ (description
+ "FragGeneScan is a program for predicting bacterial and archaeal genes in
+short and error-prone DNA sequencing reads. It can also be applied to predict
+genes in incomplete assemblies or complete genomes.")
+ ;; GPL3+ according to private correspondense with the authors.
+ (license license:gpl3+)))
+
(define-public grit
(package
(name "grit")
@@ -1690,7 +1772,7 @@ sequencing tag position and orientation.")
(define-public mafft
(package
(name "mafft")
- (version "7.221")
+ (version "7.267")
(source (origin
(method url-fetch)
(uri (string-append
@@ -1699,7 +1781,7 @@ sequencing tag position and orientation.")
(file-name (string-append name "-" version ".tgz"))
(sha256
(base32
- "0xi7klbsgi049vsrk6jiwh9wfj3b770gz3c8c7zwij448v0dr73d"))))
+ "1xl6xq1rfxkws0svrlhyqxhhwbv6r77jwblsdpcyiwzsscw6wlk0"))))
(build-system gnu-build-system)
(arguments
`(#:tests? #f ; no automated tests, though there are tests in the read me
@@ -1720,6 +1802,9 @@ sequencing tag position and orientation.")
;; remove mafft-homologs.rb from SCRIPTS
(("^SCRIPTS = mafft mafft-homologs.rb")
"SCRIPTS = mafft")
+ ;; remove mafft-homologs from MANPAGES
+ (("^MANPAGES = mafft.1 mafft-homologs.1")
+ "MANPAGES = mafft.1")
;; remove mafft-distance from PROGS
(("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
"PROGS = dvtditr dndfast7 dndblast sextet5")
@@ -1732,9 +1817,22 @@ sequencing tag position and orientation.")
(("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
\\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
#t))
+ (add-after 'enter-dir 'patch-paths
+ (lambda* (#:key inputs #:allow-other-keys)
+ (substitute* '("pairash.c"
+ "mafft.tmpl")
+ (("perl") (which "perl"))
+ (("([\"`| ])awk" _ prefix)
+ (string-append prefix (which "awk")))
+ (("grep") (which "grep")))
+ #t))
(delete 'configure))))
(inputs
- `(("perl" ,perl)))
+ `(("perl" ,perl)
+ ("gawk" ,gawk)
+ ("grep" ,grep)))
+ (propagated-inputs
+ `(("coreutils" ,coreutils)))
(home-page "http://mafft.cbrc.jp/alignment/software/")
(synopsis "Multiple sequence alignment program")
(description
@@ -3123,6 +3221,203 @@ BLAST, KEGG, GenBank, MEDLINE and GO.")
;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
(license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
+(define-public r-acsnminer
+ (package
+ (name "r-acsnminer")
+ (version "0.15.11")
+ (source (origin
+ (method url-fetch)
+ (uri (cran-uri "ACSNMineR" version))
+ (sha256
+ (base32
+ "1dl4drhjyazwm9wxlm8yfppwvvj4h6jxwmz8kfw5bxpb3jdnsqvy"))))
+ (properties `((upstream-name . "ACSNMineR")))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-ggplot2" ,r-ggplot2)
+ ("r-gridextra" ,r-gridextra)))
+ (home-page "http://cran.r-project.org/web/packages/ACSNMineR")
+ (synopsis "Gene enrichment analysis")
+ (description
+ "This package provides tools to compute and represent gene set enrichment
+or depletion from your data based on pre-saved maps from the @dfn{Atlas of
+Cancer Signalling Networks} (ACSN) or user imported maps. The gene set
+enrichment can be run with hypergeometric test or Fisher exact test, and can
+use multiple corrections. Visualization of data can be done either by
+barplots or heatmaps.")
+ (license license:gpl2+)))
+
+(define-public r-biocgenerics
+ (package
+ (name "r-biocgenerics")
+ (version "0.16.1")
+ (source (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "BiocGenerics" version))
+ (sha256
+ (base32
+ "0f16ryy5f012hvksrwlmm33bcl7lw97i2jvhbnwfwl03j4w7nhc1"))))
+ (properties
+ `((upstream-name . "BiocGenerics")
+ (r-repository . bioconductor)))
+ (build-system r-build-system)
+ (home-page "http://bioconductor.org/packages/BiocGenerics")
+ (synopsis "S4 generic functions for Bioconductor")
+ (description
+ "This package provides S4 generic functions needed by many Bioconductor
+packages.")
+ (license license:artistic2.0)))
+
+(define-public r-s4vectors
+ (package
+ (name "r-s4vectors")
+ (version "0.8.5")
+ (source (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "S4Vectors" version))
+ (sha256
+ (base32
+ "10f4jxwlwsiy7zhb3kgp6anid0d7wkvrrljl80r3nhx38yr24l5k"))))
+ (properties
+ `((upstream-name . "S4Vectors")
+ (r-repository . bioconductor)))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-biocgenerics" ,r-biocgenerics)))
+ (home-page "http://bioconductor.org/packages/S4Vectors")
+ (synopsis "S4 implementation of vectors and lists")
+ (description
+ "The S4Vectors package defines the @code{Vector} and @code{List} virtual
+classes and a set of generic functions that extend the semantic of ordinary
+vectors and lists in R. Package developers can easily implement vector-like
+or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
+In addition, a few low-level concrete subclasses of general interest (e.g.
+@code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
+S4Vectors package itself.")
+ (license license:artistic2.0)))
+
+(define-public r-iranges
+ (package
+ (name "r-iranges")
+ (version "2.4.6")
+ (source (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "IRanges" version))
+ (sha256
+ (base32
+ "00x0266sys1fc5ipa639y84p6m6mgspk2xb099vcwmd3w4hypj9d"))))
+ (properties
+ `((upstream-name . "IRanges")
+ (r-repository . bioconductor)))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-biocgenerics" ,r-biocgenerics)
+ ("r-s4vectors" ,r-s4vectors)))
+ (home-page "http://bioconductor.org/packages/IRanges")
+ (synopsis "Infrastructure for manipulating intervals on sequences")
+ (description
+ "This package provides efficient low-level and highly reusable S4 classes
+for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
+generally, data that can be organized sequentially (formally defined as
+@code{Vector} objects), as well as views on these @code{Vector} objects.
+Efficient list-like classes are also provided for storing big collections of
+instances of the basic classes. All classes in the package use consistent
+naming and share the same rich and consistent \"Vector API\" as much as
+possible.")
+ (license license:artistic2.0)))
+
+(define-public r-genomeinfodb
+ (package
+ (name "r-genomeinfodb")
+ (version "1.6.1")
+ (source (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "GenomeInfoDb" version))
+ (sha256
+ (base32
+ "1j2n1v1mrw1fxn7cyffz112pm76wd6gy9q9qwlsfv3brbsqbvdbf"))))
+ (properties
+ `((upstream-name . "GenomeInfoDb")
+ (r-repository . bioconductor)))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-biocgenerics" ,r-biocgenerics)
+ ("r-iranges" ,r-iranges)
+ ("r-s4vectors" ,r-s4vectors)))
+ (home-page "http://bioconductor.org/packages/GenomeInfoDb")
+ (synopsis "Utilities for manipulating chromosome identifiers")
+ (description
+ "This package contains data and functions that define and allow
+translation between different chromosome sequence naming conventions (e.g.,
+\"chr1\" versus \"1\"), including a function that attempts to place sequence
+names in their natural, rather than lexicographic, order.")
+ (license license:artistic2.0)))
+
+(define-public r-xvector
+ (package
+ (name "r-xvector")
+ (version "0.10.0")
+ (source (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "XVector" version))
+ (sha256
+ (base32
+ "0havwyr6xqk7w0rmbwfj9jq1djz7wzdz7w39adhklwzwz9l4ih3a"))))
+ (properties
+ `((upstream-name . "XVector")
+ (r-repository . bioconductor)))
+ (build-system r-build-system)
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'use-system-zlib
+ (lambda _
+ (substitute* "DESCRIPTION"
+ (("zlibbioc, ") ""))
+ (substitute* "NAMESPACE"
+ (("import\\(zlibbioc\\)") ""))
+ #t)))))
+ (inputs
+ `(("zlib" ,zlib)))
+ (propagated-inputs
+ `(("r-biocgenerics" ,r-biocgenerics)
+ ("r-iranges" ,r-iranges)
+ ("r-s4vectors" ,r-s4vectors)))
+ (home-page "http://bioconductor.org/packages/XVector")
+ (synopsis "Representation and manpulation of external sequences")
+ (description
+ "This package provides memory efficient S4 classes for storing sequences
+\"externally\" (behind an R external pointer, or on disk).")
+ (license license:artistic2.0)))
+
+(define-public r-genomicranges
+ (package
+ (name "r-genomicranges")
+ (version "1.22.2")
+ (source (origin
+ (method url-fetch)
+ (uri (bioconductor-uri "GenomicRanges" version))
+ (sha256
+ (base32
+ "1jffvcs0jsi7q4l3pvjj6r73vll80csgkljvhqp0g2ixc43jjng9"))))
+ (properties
+ `((upstream-name . "GenomicRanges")
+ (r-repository . bioconductor)))
+ (build-system r-build-system)
+ (propagated-inputs
+ `(("r-biocgenerics" ,r-biocgenerics)
+ ("r-genomeinfodb" ,r-genomeinfodb)
+ ("r-xvector" ,r-xvector)))
+ (home-page "http://bioconductor.org/packages/GenomicRanges")
+ (synopsis "Representation and manipulation of genomic intervals")
+ (description
+ "This package provides tools to efficiently represent and manipulate
+genomic annotations and alignments is playing a central role when it comes to
+analyzing high-throughput sequencing data (a.k.a. NGS data). The
+GenomicRanges package defines general purpose containers for storing and
+manipulating genomic intervals and variables defined along a genome.")
+ (license license:artistic2.0)))
+
(define-public r-qtl
(package
(name "r-qtl")