[med-svn] r18596 - in trunk/packages/vsearch/trunk/debian: . tests

Tue Jan 20 11:05:46 UTC 2015

Author: tille
Date: 2015-01-20 11:05:45 +0000 (Tue, 20 Jan 2015)
New Revision: 18596

Removed:
   trunk/packages/vsearch/trunk/debian/createmanpages
   trunk/packages/vsearch/trunk/debian/vsearch.1
Modified:
   trunk/packages/vsearch/trunk/debian/README.source
   trunk/packages/vsearch/trunk/debian/copyright
   trunk/packages/vsearch/trunk/debian/rules
   trunk/packages/vsearch/trunk/debian/tests/run-unit-test
   trunk/packages/vsearch/trunk/debian/vsearch-data.install
   trunk/packages/vsearch/trunk/debian/vsearch.docs
   trunk/packages/vsearch/trunk/debian/vsearch.manpages
Log:
Remove uncompressed copies of large data files from source tarball and data package and rather recreate these files if testsuite will be run; use upstream manpage


Modified: trunk/packages/vsearch/trunk/debian/README.source
===================================================================

--- trunk/packages/vsearch/trunk/debian/README.source	2015-01-19 21:36:02 UTC (rev 18595)
+++ trunk/packages/vsearch/trunk/debian/README.source	2015-01-20 11:05:45 UTC (rev 18596)
@@ -3,4 +3,16 @@
 I'm sure they'll sort this at some point, but for now we have to compile the
 whole thing three times.
 
-Tim Booth
+ -- Tim Booth <tbooth at ceh.ac.uk>  Fri, 09 Jan 2015 17:13:35 +0000
+
+
+The file data/BioMarKs.fsa.gz has the very same content as
+data/BioMarKs.fsa.bz2 but just a different compression method.
+To save disk space the former is deleted from source tarball and
+recreated in the test target where it is needed.
+
+The same is true for data/PR2-18S-rRNA-V4.fsa which is just an
+uncompressed copy of data/PR2-18S-rRNA-V4.fsa.bz2
+
+ -- Andreas Tille <tille at debian.org>  Wed, 14 Jan 2015 10:40:22 +0000
+

Modified: trunk/packages/vsearch/trunk/debian/copyright
===================================================================
--- trunk/packages/vsearch/trunk/debian/copyright	2015-01-19 21:36:02 UTC (rev 18595)
+++ trunk/packages/vsearch/trunk/debian/copyright	2015-01-20 11:05:45 UTC (rev 18596)
@@ -3,6 +3,14 @@
 Upstream-Contact: Torbjørn Rognes
 Source: https://github.com/torognes/vsearch/
 Files-Excluded: bin/*
+                data/BioMarKs.fsa.gz
+                data/PR2-18S-rRNA-V4.fsa
+Comment: data/BioMarKs.fsa.gz has the very same content as
+         data/BioMarKs.fsa.bz2 but just a different compression method.
+         To save disk space the former is deleted from source tarball and
+         recreated in the test target where it is needed.
+         The same is true for data/PR2-18S-rRNA-V4.fsa which is just
+         an uncompressed copy of data/PR2-18S-rRNA-V4.fsa.bz2
 
 Files: *
 Copyright: © 2014-2015 Torbjorn Rognes & Tomas Flouri

Deleted: trunk/packages/vsearch/trunk/debian/createmanpages
===================================================================
--- trunk/packages/vsearch/trunk/debian/createmanpages	2015-01-19 21:36:02 UTC (rev 18595)
+++ trunk/packages/vsearch/trunk/debian/createmanpages	2015-01-20 11:05:45 UTC (rev 18596)
@@ -1,16 +0,0 @@
-#!/bin/sh
-MANDIR=debian
-mkdir -p $MANDIR
-
-VERSION=`dpkg-parsechangelog | awk '/^Version:/ {print $2}' | sed -e 's/^[0-9]*://' -e 's/-.*//' -e 's/[+~]dfsg$//'`
-
-help2man --no-info --no-discard-stderr \
-         --name='tool for processing metagenomic sequences' \
-            --version-string="$VERSION" vsearch > $MANDIR/vsearch.1
-
-cat <<EOT
-Please enhance the help2man output.
-The following web page might be helpful in doing so:
-    http://liw.fi/manpages/
-EOT
-

Modified: trunk/packages/vsearch/trunk/debian/rules
===================================================================
--- trunk/packages/vsearch/trunk/debian/rules	2015-01-19 21:36:02 UTC (rev 18595)
+++ trunk/packages/vsearch/trunk/debian/rules	2015-01-20 11:05:45 UTC (rev 18596)
@@ -4,6 +4,7 @@
 #export DH_VERBOSE=1
 
 pkg := $(shell dpkg-parsechangelog | sed -n 's/^Source: //p')
+testdir := $(CURDIR)/debian/$(pkg)-data/usr/share/doc/$(pkg)/tests
 
 %:
 	dh $@ --sourcedirectory=src
@@ -16,30 +17,39 @@
 	dh_auto_clean
 	dh_auto_build
 	markdown_py -f README.html README.md
+	markdown_py -f data/README.html data/README.md
 
 override_dh_auto_clean:
 	dh_auto_clean
 	rm -f src/vsearch-gz src/vsearch-bz
 
 override_dh_auto_test:
+	# restore data files from original tarball as well as those files needed for the full testsuite
+	cd data && bunzip2 --keep BioMarKs.fsa.bz2 && gzip --keep --rsyncable BioMarKs.fsa
+	cd data && bunzip2 --keep PR2-18S-rRNA-V4.fsa.bz2 && gzip --keep --rsyncable PR2-18S-rRNA-V4.fsa
 	cd data && tar -xvzf simm.tar.gz
 	cd test && \
 	for t in *.sh ; do \
 	    bash $$t v ; \
 	done
+	rm -f data/BioMarKs.fsa data/BioMarKs.fsa.gz \
+	      data/PR2-18S-rRNA-V4.fsa data/PR2-18S-rRNA-V4.fsa.gz
 
 override_dh_clean:
 	dh_clean
 	rm -rf data/simm
 	find test -mindepth 1 -not -name '*.sh' -delete
-	rm -f README.html
+	rm -f README.html data/README.html
 	rm -f *.out
 
 override_dh_install:
 	dh_install
 	# tweak path tp vsearch binary in test scripts
-	mkdir -p $(CURDIR)/debian/$(pkg)-data/usr/share/doc/$(pkg)/test
-	for tst in test/*.sh ; do sed 's?\.\./src/vsearch?/usr/bin/vsearch?' $${tst} > $(CURDIR)/debian/$(pkg)-data/usr/share/doc/$(pkg)/$${tst} ; done
+	mkdir -p $(testdir)/eval
+	for tst in eval/*.sh ; do sed 's?\.\./src/vsearch?/usr/bin/vsearch?' $${tst} > $(testdir)/$${tst} ; done
+	sed -i 's?#\(VSEARCH=/usr/bin/vsearch\)?\1?' $(testdir)/eval/eval.sh ; done
+	mkdir -p $(testdir)/test
+	for tst in test/*.sh ; do sed 's?\.\./src/vsearch?/usr/bin/vsearch?' $${tst} > $(testdir)/$${tst} ; done
 
 get-orig-source:
 	uscan --verbose --force-download --repack --compression xz

Modified: trunk/packages/vsearch/trunk/debian/tests/run-unit-test
===================================================================
--- trunk/packages/vsearch/trunk/debian/tests/run-unit-test	2015-01-19 21:36:02 UTC (rev 18595)
+++ trunk/packages/vsearch/trunk/debian/tests/run-unit-test	2015-01-20 11:05:45 UTC (rev 18596)
@@ -2,18 +2,20 @@
 
 pkg=vsearch
 if [ "$ADTTMP" = "" ] ; then
-  ADTTMP=`mktemp -d /tmp/${pkg}-test.XXXXXX`
+  ADTTMP=`mktemp -d /var/tmp/${pkg}-test.XXXXXX`
 fi
 cd $ADTTMP
 mkdir -p data
 cd data
 tar xaf /usr/share/doc/${pkg}/examples/simm.tar.gz
 cp -a /usr/share/doc/${pkg}/examples/[A-Z]* .
-gunzip BioMarKs50k* [PR]*.gz
 cd ..
-cp -a /usr/share/doc/${pkg}/test .
-cd test
+cp -a /usr/share/doc/${pkg}/tests/eval .
+cp -a /usr/share/doc/${pkg}/tests/test .
 find . -type f -name "*.gz" -exec gunzip \{\} \;
+cd data && bunzip2 --keep BioMarKs.fsa.bz2 && gzip --keep --rsyncable BioMarKs.fsa && \
+           bunzip2 --keep PR2-18S-rRNA-V4.fsa.bz2 && gzip --keep --rsyncable PR2-18S-rRNA-V4.fsa
+cd ../test
 for t in *.sh ; do \
     bash $t v ; \
 done

Modified: trunk/packages/vsearch/trunk/debian/vsearch-data.install
===================================================================
--- trunk/packages/vsearch/trunk/debian/vsearch-data.install	2015-01-19 21:36:02 UTC (rev 18595)
+++ trunk/packages/vsearch/trunk/debian/vsearch-data.install	2015-01-20 11:05:45 UTC (rev 18596)
@@ -4,3 +4,6 @@
 data/PR2-18S-rRNA-V4.fsa*	usr/share/doc/vsearch/examples
 data/Rfam_11_0.*		usr/share/doc/vsearch/examples
 data/Rfam_9_1.fasta		usr/share/doc/vsearch/examples
+data/README.html		usr/share/doc/vsearch/examples
+eval/*.pl			usr/share/doc/vsearch/tests/eval
+eval/*.txt			usr/share/doc/vsearch/tests/eval

Deleted: trunk/packages/vsearch/trunk/debian/vsearch.1
===================================================================
--- trunk/packages/vsearch/trunk/debian/vsearch.1	2015-01-19 21:36:02 UTC (rev 18595)
+++ trunk/packages/vsearch/trunk/debian/vsearch.1	2015-01-20 11:05:45 UTC (rev 18596)
@@ -1,396 +0,0 @@
-.\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.46.2.
-.TH VSEARCH "1" "January 2015" "vsearch 1.0.7" "User Commands"
-.SH NAME
-vsearch \- tool for processing metagenomic sequences
-.SH SYNOPSIS
-.B vsearch
-[\fI\,OPTIONS\/\fR]
-.SH DESCRIPTION
-.PP
-64-bit multithreaded tool for processing metagenomic sequences, including
-searching, clustering, chimera detection, dereplication, sorting, masking
-and shuffling
-.PP
-The aim of this project is to create an alternative to the USEARCH tool
-developed by Robert C. Edgar (2010).
-.SH OPTIONS
-.TP
-\fB\-\-help\fR
-display help information
-.TP
-\fB\-\-version\fR
-display version information
-.TP
-\fB\-\-fasta_width\fR INT
-width of FASTA seq lines, 0 for no wrap (80)
-.TP
-\fB\-\-maxseqlength\fR INT
-maximum sequence length (50000)
-.TP
-\fB\-\-minseqlength\fR INT
-min seq length (clust/derep/search: 32, other:1)
-.TP
-\fB\-\-notrunclabels\fR
-do not truncate labels at first space
-.TP
-\fB\-\-threads\fR INT
-number of threads to use, zero for all cores (0)
-.PP
-Alignment options (most searching options also apply)
-.TP
-\fB\-\-allpairs_global\fR FILENAME
-perform global alignment of all sequence pairs
-.TP
-\fB\-\-alnout\fR FILENAME
-filename for human\-readable alignment output
-.TP
-\fB\-\-acceptall\fR
-output all pairwise alignments
-.PP
-Chimera detection options
-.TP
-\fB\-\-abskew\fR REAL
-min abundance ratio of parent vs chimera (2.0)
-.TP
-\fB\-\-alignwidth\fR INT
-width of alignment in uchimealn output (80)
-.TP
-\fB\-\-chimeras\fR FILENAME
-output chimeric sequences to file
-.TP
-\fB\-\-db\fR FILENAME
-reference database for \fB\-\-uchime_ref\fR
-.TP
-\fB\-\-dn\fR REAL
-\&'no' vote pseudo\-count (1.4)
-.TP
-\fB\-\-mindiffs\fR INT
-minimum number of differences in segment (3)
-.TP
-\fB\-\-mindiv\fR REAL
-minimum divergence from closest parent (0.8)
-.TP
-\fB\-\-minh\fR REAL
-minimum score (0.28)
-.TP
-\fB\-\-nonchimeras\fR FILENAME
-output non\-chimeric sequences to file
-.TP
-\fB\-\-self\fR
-exclude identical labels for \fB\-\-uchime_ref\fR
-.TP
-\fB\-\-selfid\fR
-exclude identical sequences for \fB\-\-uchime_ref\fR
-.TP
-\fB\-\-uchime_denovo\fR FILENAME
-detect chimeras de novo
-.TP
-\fB\-\-uchime_ref\fR FILENAME
-detect chimeras using a reference database
-.TP
-\fB\-\-uchimealns\fR FILENAME
-output chimera alignments to file
-.TP
-\fB\-\-uchimeout\fR FILENAME
-output to chimera info to tab\-separated file
-.TP
-\fB\-\-uchimeout5\fR
-make output compatible with uchime version 5
-.TP
-\fB\-\-xn\fR REAL
-\&'no' vote weight (8.0)
-.PP
-Clustering options (most searching options also apply)
-.TP
-\fB\-\-centroids\fR FILENAME
-output centroid sequences to FASTA file
-.TP
-\fB\-\-cluster_fast\fR FILENAME
-cluster sequences fast
-.HP
-\fB\-\-cluster_smallmem\fR FILENAME cluster sequences using a small amount of memory
-.TP
-\fB\-\-clusters\fR STRING
-output each cluster to a separate FASTA file
-.TP
-\fB\-\-consout\fR FILENAME
-output cluster consensus sequences to FASTA file
-.TP
-\fB\-\-cons_truncate\fR
-do not ignore terminal gaps in MSA for consensus
-.TP
-\fB\-\-id\fR REAL
-reject if identity lower
-.TP
-\fB\-\-iddef\fR INT
-id definition, 0\-4=CD\-HIT,all,int,MBL,BLAST (2)
-.TP
-\fB\-\-msaout\fR FILENAME
-output multiple seq. alignments to FASTA file
-.TP
-\fB\-\-qmask\fR none|dust|soft
-mask seqs with dust, soft or no method (dust)
-.TP
-\fB\-\-sizein\fR
-read abundance annotation from input
-.TP
-\fB\-\-sizeout\fR
-write cluster abundances to centroid file
-.TP
-\fB\-\-strand\fR plus|both
-cluster using plus or both strands (plus)
-.TP
-\fB\-\-uc\fR FILENAME
-filename for UCLUST\-like output
-.TP
-\fB\-\-usersort\fR
-indicate that input sequences are presorted
-.PP
-Dereplication options
-.HP
-\fB\-\-derep_fulllength\fR FILENAME dereplicate sequences in the given FASTA file
-.TP
-\fB\-\-maxuniquesize\fR INT
-maximum abundance for output from dereplication
-.TP
-\fB\-\-minuniquesize\fR INT
-minimum abundance for output from dereplication
-.TP
-\fB\-\-output\fR FILENAME
-output FASTA file
-.TP
-\fB\-\-sizein\fR
-read abundance annotation from input
-.TP
-\fB\-\-sizeout\fR
-write abundance annotation to output
-.TP
-\fB\-\-strand\fR plus|both
-dereplicate plus or both strands (plus)
-.TP
-\fB\-\-topn\fR INT
-output just the n most abundant sequences
-.TP
-\fB\-\-uc\fR FILENAME
-filename for UCLUST\-like output
-.PP
-Masking options
-.TP
-\fB\-\-hardmask\fR
-mask by replacing with N instead of lower case
-.TP
-\fB\-\-maskfasta\fR FILENAME
-mask sequences in the given FASTA file
-.TP
-\fB\-\-output\fR FILENAME
-output to specified FASTA file
-.TP
-\fB\-\-qmask\fR none|dust|soft
-mask seqs with dust, soft or no method (dust)
-.PP
-Searching options
-.TP
-\fB\-\-alnout\fR FILENAME
-filename for human\-readable alignment output
-.TP
-\fB\-\-blast6out\fR FILENAME
-filename for blast\-like tab\-separated output
-.TP
-\fB\-\-db\fR FILENAME
-filename for FASTA formatted database for search
-.TP
-\fB\-\-dbmask\fR none|dust|soft
-mask db with dust, soft or no method (dust)
-.TP
-\fB\-\-dbmatched\fR FILENAME
-FASTA file for matching database sequences
-.TP
-\fB\-\-dbnotmatched\fR FILENAME
-FASTA file for non\-matching database sequences
-.TP
-\fB\-\-fastapairs\fR FILENAME
-FASTA file with pairs of query and target
-.TP
-\fB\-\-fulldp\fR
-full dynamic programming alignment (always on)
-.TP
-\fB\-\-gapext\fR STRING
-penalties for gap extension (2I/1E)
-.TP
-\fB\-\-gapopen\fR STRING
-penalties for gap opening (20I/2E)
-.TP
-\fB\-\-hardmask\fR
-mask by replacing with N instead of lower case
-.TP
-\fB\-\-id\fR REAL
-reject if identity lower
-.TP
-\fB\-\-iddef\fR INT
-id definition, 0\-4=CD\-HIT,all,int,MBL,BLAST (2)
-.TP
-\fB\-\-idprefix\fR INT
-reject if first n nucleotides do not match
-.TP
-\fB\-\-idsuffix\fR INT
-reject if last n nucleotides do not match
-.TP
-\fB\-\-leftjust\fR
-reject if terminal gaps at alignment left end
-.TP
-\fB\-\-match\fR INT
-score for match (2)
-.TP
-\fB\-\-matched\fR FILENAME
-FASTA file for matching query sequences
-.TP
-\fB\-\-maxaccepts\fR INT
-number of hits to accept and show per strand (1)
-.TP
-\fB\-\-maxdiffs\fR INT
-reject if more substitutions or indels
-.TP
-\fB\-\-maxgaps\fR INT
-reject if more indels
-.TP
-\fB\-\-maxhits\fR INT
-maximum number of hits to show (unlimited)
-.TP
-\fB\-\-maxid\fR REAL
-reject if identity higher
-.TP
-\fB\-\-maxqsize\fR INT
-reject if query abundance larger
-.TP
-\fB\-\-maxqt\fR REAL
-reject if query/target length ratio higher
-.TP
-\fB\-\-maxrejects\fR INT
-number of non\-matching hits to consider (32)
-.TP
-\fB\-\-maxsizeratio\fR REAL
-reject if query/target abundance ratio higher
-.TP
-\fB\-\-maxsl\fR REAL
-reject if shorter/longer length ratio higher
-.TP
-\fB\-\-maxsubs\fR INT
-reject if more substitutions
-.TP
-\fB\-\-mid\fR REAL
-reject if percent identity lower, ignoring gaps
-.TP
-\fB\-\-mincols\fR INT
-reject if alignment length shorter
-.TP
-\fB\-\-minqt\fR REAL
-reject if query/target length ratio lower
-.TP
-\fB\-\-minsizeratio\fR REAL
-reject if query/target abundance ratio lower
-.TP
-\fB\-\-minsl\fR REAL
-reject if shorter/longer length ratio lower
-.TP
-\fB\-\-mintsize\fR INT
-reject if target abundance lower
-.TP
-\fB\-\-mismatch\fR INT
-score for mismatch (\fB\-4\fR)
-.TP
-\fB\-\-notmatched\fR FILENAME
-FASTA file for non\-matching query sequences
-.TP
-\fB\-\-output_no_hits\fR
-output non\-matching queries to output files
-.TP
-\fB\-\-qmask\fR none|dust|soft
-mask query with dust, soft or no method (dust)
-.TP
-\fB\-\-query_cov\fR REAL
-reject if fraction of query seq. aligned lower
-.TP
-\fB\-\-rightjust\fR
-reject if terminal gaps at alignment right end
-.TP
-\fB\-\-rowlen\fR INT
-width of alignment lines in alnout output (64)
-.TP
-\fB\-\-self\fR
-reject if labels identical
-.TP
-\fB\-\-selfid\fR
-reject if sequences identical
-.TP
-\fB\-\-sizeout\fR
-write abundance annotation to output
-.TP
-\fB\-\-strand\fR plus|both
-search plus or both strands (plus)
-.TP
-\fB\-\-target_cov\fR REAL
-reject if fraction of target seq. aligned lower
-.TP
-\fB\-\-top_hits_only\fR
-output only hits with identity equal to the best
-.TP
-\fB\-\-uc\fR FILENAME
-filename for UCLUST\-like output
-.TP
-\fB\-\-uc_allhits\fR
-show all, not just top hit with uc output
-.TP
-\fB\-\-usearch_global\fR FILENAME
-filename of queries for global alignment search
-.TP
-\fB\-\-userfields\fR STRING
-fields to output in userout file
-.TP
-\fB\-\-userout\fR FILENAME
-filename for user\-defined tab\-separated output
-.TP
-\fB\-\-weak_id\fR REAL
-include aligned hits with >= id; continue search
-.TP
-\fB\-\-wordlength\fR INT
-length of words for database index 3\-15 (8)
-.PP
-Shuffling options
-.TP
-\fB\-\-output\fR FILENAME
-output to specified FASTA file
-.TP
-\fB\-\-seed\fR INT
-seed for PRNG, zero to use random data source (0)
-.TP
-\fB\-\-shuffle\fR FILENAME
-shuffle order of sequences pseudo\-randomly
-.TP
-\fB\-\-topn\fR INT
-output just first n sequences
-.PP
-Sorting options
-.TP
-\fB\-\-maxsize\fR INT
-maximum abundance for sortbysize
-.TP
-\fB\-\-minsize\fR INT
-minimum abundance for sortbysize
-.TP
-\fB\-\-output\fR FILENAME
-output FASTA file
-.TP
-\fB\-\-relabel\fR STRING
-relabel with this prefix string after sorting
-.TP
-\fB\-\-sizeout\fR
-add abundance annotation to output
-.TP
-\fB\-\-sortbylength\fR FILENAME
-sort sequences by length in given FASTA file
-.TP
-\fB\-\-sortbysize\fR FILENAME
-abundance sort sequences in given FASTA file
-.TP
-\fB\-\-topn\fR INT
-output just top n seqs after sorting

Modified: trunk/packages/vsearch/trunk/debian/vsearch.docs
===================================================================
--- trunk/packages/vsearch/trunk/debian/vsearch.docs	2015-01-19 21:36:02 UTC (rev 18595)
+++ trunk/packages/vsearch/trunk/debian/vsearch.docs	2015-01-20 11:05:45 UTC (rev 18596)
@@ -1 +1,2 @@
 *.html
+doc/*.pdf

Modified: trunk/packages/vsearch/trunk/debian/vsearch.manpages
===================================================================
--- trunk/packages/vsearch/trunk/debian/vsearch.manpages	2015-01-19 21:36:02 UTC (rev 18595)
+++ trunk/packages/vsearch/trunk/debian/vsearch.manpages	2015-01-20 11:05:45 UTC (rev 18596)
@@ -1 +1 @@
-debian/*.1
+doc/*.1