[med-svn] [SCM] grinder branch, master, updated. upstream/0.4.5-3-g73484a3

Charles Plessy plessy at debian.org
Thu May 3 08:00:34 UTC 2012


The following commit has been merged in the master branch:
commit b9f442f48c431144b9e35ba943321067768a22a2
Author: Charles Plessy <plessy at debian.org>
Date:   Thu May 3 16:52:48 2012 +0900

    Revert "Imported Upstream version 0.4.5"
    
    This reverts commit fe74afe9efbc6ce125dd71bb92a0c577e36f6064.

diff --git a/CHANGES b/CHANGES
index 1cb7d7a..43bf661 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,18 +1,9 @@
 Revision history for Grinder
 
-0.4.5   27-Jan-2012   
-        Fixed bug when adding mutations linearly to a 1 bp read (reported by
-          Robert Schmieder).
-        Better handling of 0 bp reference sequences.
-        Fixed bug when looking for amplicons on the reverse complement of a
-          reference sequence.
-        Properly remove the shortest of two amplicons, even if they are on
-          different strands.
-
-0.4.4   20-Jan-2012
+0.4.4   20-Jan-2011
         Dependencies update: no need for Math::Random::MT::Perl anymore.
 
-0.4.3   18-Jan-2012
+0.4.3   18-Jan-2011
         Implemented multimeras, i.e. chimeras from more than two reference
           sequences (suggested by anonymous reviewer). See <chimera_dist>.
         Implemented chimeras where the breakpoints correspond to k-mers shared
@@ -46,7 +37,7 @@ Revision history for Grinder
         Speed improvement for position-specific error models
         Galaxy GUI fix so that the output is fastqsanger, not just fastq
         The reference_file parameter is now a required argument, so that running
-          grinder without arguments displays the help (reported by Robert Schmieder)
+          grinder without arguments displays the help (reported by Rob Schmieder)
         Fixed a bug that caused a crash when using an indel model and a homopolymer
           model simultaneously (reported by Robert Schmieder)
         Information displayed on screen now reports whether the library is a
diff --git a/MANIFEST b/MANIFEST
index bb6268e..0af259b 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -25,17 +25,12 @@ lib/Grinder.pm
 lib/Grinder/KmerCollection.pm
 LICENSE
 Makefile.PL
-man/average_genome_size.1
-man/change_paired_read_orientation.1
-man/grinder.1
 MANIFEST			This list of files
 META.yml
-MYMETA.json
 MYMETA.yml
 README
 README.htm
 script/grinder
-script/grinder.pod
 t/00-load.t
 t/01-shotgun.t
 t/02-mates.t
@@ -91,7 +86,6 @@ t/data/revcom_amplicon_database.fa
 t/data/reverse_forward_primers.fa
 t/data/reverse_primer.fa
 t/data/shotgun_database.fa
-t/data/shotgun_database_extended.fa
 t/data/single_seq_database.fa
 t/pod.t
 t/TestUtils.pm
diff --git a/META.yml b/META.yml
index 1c9c288..58d880a 100644
--- a/META.yml
+++ b/META.yml
@@ -3,10 +3,10 @@ abstract: 'A versatile omics shotgun and amplicon sequencing read simulator'
 author:
   - 'Florent Angly <florent.angly at gmail.com>'
 build_requires:
-  ExtUtils::MakeMaker: 6.62
+  ExtUtils::MakeMaker: 6.57
   Test::More: 0
 configure_requires:
-  ExtUtils::MakeMaker: 6.62
+  ExtUtils::MakeMaker: 6.57
 distribution_type: module
 dynamic_config: 1
 generated_by: 'Module::Install version 1.04'
@@ -21,7 +21,7 @@ no_index:
     - t
 requires:
   Bio::SeqIO: 0
-  Getopt::Euclid: 0.3.4
+  Getopt::Euclid: 0.2.8
   Math::Random::MT: 1.13
   perl: 5.6.0
 resources:
@@ -29,4 +29,4 @@ resources:
   homepage: http://sourceforge.net/projects/biogrinder/
   license: http://opensource.org/licenses/gpl-3.0.html
   repository: git://biogrinder.git.sourceforge.net/gitroot/biogrinder/biogrinder
-version: 0.004005
+version: 0.4.4
diff --git a/MYMETA.json b/MYMETA.json
deleted file mode 100644
index 4861b78..0000000
--- a/MYMETA.json
+++ /dev/null
@@ -1,57 +0,0 @@
-{
-   "abstract" : "A versatile omics shotgun and amplicon sequencing read simulator",
-   "author" : [
-      "Florent Angly <florent.angly at gmail.com>"
-   ],
-   "dynamic_config" : 0,
-   "generated_by" : "Module::Install version 1.04, CPAN::Meta::Converter version 2.113640",
-   "license" : [
-      "unknown"
-   ],
-   "meta-spec" : {
-      "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
-      "version" : "2"
-   },
-   "name" : "Grinder",
-   "no_index" : {
-      "directory" : [
-         "inc",
-         "t"
-      ]
-   },
-   "prereqs" : {
-      "build" : {
-         "requires" : {
-            "ExtUtils::MakeMaker" : "6.62",
-            "Test::More" : 0
-         }
-      },
-      "configure" : {
-         "requires" : {
-            "ExtUtils::MakeMaker" : "6.62"
-         }
-      },
-      "runtime" : {
-         "requires" : {
-            "Bio::SeqIO" : 0,
-            "Getopt::Euclid" : "v0.3.4",
-            "Math::Random::MT" : "1.13",
-            "perl" : "5.006"
-         }
-      }
-   },
-   "release_status" : "testing",
-   "resources" : {
-      "bugtracker" : {
-         "web" : "http://sourceforge.net/tracker/?group_id=244196&atid=1124737"
-      },
-      "homepage" : "http://sourceforge.net/projects/biogrinder/",
-      "license" : [
-         "http://opensource.org/licenses/gpl-3.0.html"
-      ],
-      "repository" : {
-         "url" : "git://biogrinder.git.sourceforge.net/gitroot/biogrinder/biogrinder"
-      }
-   },
-   "version" : "0.4.5_1"
-}
diff --git a/MYMETA.yml b/MYMETA.yml
index 2302d84..9e94c31 100644
--- a/MYMETA.yml
+++ b/MYMETA.yml
@@ -3,29 +3,25 @@ abstract: 'A versatile omics shotgun and amplicon sequencing read simulator'
 author:
   - 'Florent Angly <florent.angly at gmail.com>'
 build_requires:
-  ExtUtils::MakeMaker: 6.62
+  ExtUtils::MakeMaker: 6.57
   Test::More: 0
 configure_requires:
-  ExtUtils::MakeMaker: 6.62
+  ExtUtils::MakeMaker: 0
+distribution_type: module
 dynamic_config: 0
-generated_by: 'Module::Install version 1.04, CPAN::Meta::Converter version 2.113640'
-license: unknown
+generated_by: 'ExtUtils::MakeMaker version 6.57_05'
+license: gpl3
 meta-spec:
   url: http://module-build.sourceforge.net/META-spec-v1.4.html
   version: 1.4
 name: Grinder
 no_index:
   directory:
-    - inc
     - t
+    - inc
 requires:
   Bio::SeqIO: 0
-  Getopt::Euclid: v0.3.4
+  Getopt::Euclid: 0.2.8
   Math::Random::MT: 1.13
   perl: 5.006
-resources:
-  bugtracker: http://sourceforge.net/tracker/?group_id=244196&atid=1124737
-  homepage: http://sourceforge.net/projects/biogrinder/
-  license: http://opensource.org/licenses/gpl-3.0.html
-  repository: git://biogrinder.git.sourceforge.net/gitroot/biogrinder/biogrinder
-version: 0.4.5_1
+version: 0.4.4
diff --git a/Makefile.PL b/Makefile.PL
index b36b0b6..ae6787e 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -5,7 +5,7 @@ use inc::Module::Install;
 
 name     'Grinder';
 all_from 'lib/Grinder.pm';
-license  'gpl3'; # Module::Install 1.04 does not parse the GPL version number
+license  'gpl3'; # As of version 1.01, Module::Install does not parse the GPL version number
 
 resources
    homepage   'http://sourceforge.net/projects/biogrinder/';
@@ -16,10 +16,10 @@ resources
 # Dependencies for everyone
 
 build_requires  'Test::More';
-requires        'Getopt::Euclid'          => '0.3.4';
+
+requires        'Getopt::Euclid'          => '0.2.8';
 requires        'Math::Random::MT'        => '1.13';
 requires        'Bio::SeqIO'              =>  0;
-#requires       'Bio::Root::Root'         =>  0;
 #requires       'Bio::Seq::SimulatedRead' =>  0; # required but packaged here since it is so recent
 
 
@@ -28,7 +28,7 @@ requires        'Bio::SeqIO'              =>  0;
 author_requires 'Module::Install';
 author_requires 'Module::Install::AuthorRequires';
 author_requires 'Module::Install::AutoLicense';
-author_requires 'Module::Install::ReadmeFromPod' => 0.14;
+author_requires 'Module::Install::ReadmeFromPod';
 author_requires 'Module::Install::AutoManifest';
 author_requires 'Statistics::R'  => '0.21';
 
@@ -67,32 +67,37 @@ WriteAll;
 auto_license();
 
 
-# Generate the README and manpage files from the POD docs
+# Generate the README files from the module's POD
 
-auto_doc();
+auto_readme();
 
 
 
 #--------- UTILS --------------------------------------------------------------#
 
 
-sub auto_doc {
-   print "*** Building doc...\n";
-   # Generate script/grinder.pod
-   my @args = ($^X, '-Ilib', 'script/grinder', '--podfile');
-   system(@args) == 0 or die "system @args failed: $?";
-   my $grinder = 'script/grinder.pod';
-   my $script1 = 'utils/average_genome_size';
-   my $script2 = 'utils/change_paired_read_orientation';
-   my $man_dir = 'man';
-   if (not -d $man_dir) {
-      mkdir $man_dir or die "Could not write folder $man_dir:\n$!\n";
+sub auto_readme {
+   #print "*** Building README files...\n";
+   my $source = 'lib/Grinder.pm';
+   readme_from $source;
+   readme_html_from( $source, 'README.htm' );
+   return 1;
+}
+
+sub readme_html_from {
+   my ($in, $out) = @_;
+   `pod2html --infile=$in --outfile=$out`;
+   warn "Warning: Could not generate $out.\n$!\n" if $? == -1;
+   rm_files(['pod2htmd.tmp', 'pod2htmi.tmp']);
+   return $?; # exit status
+}
+
+sub rm_files {
+   my ($files) = @_;
+   for my $file (@$files) {
+      if (-e $file) {
+         unlink $file or warn "Warning: Could not remove file '$file'.\n$!\n";
+      }
    }
-   my $clean   = 1;
-   readme_from $grinder, $clean, 'txt', 'README';
-   readme_from $grinder, $clean, 'htm', 'README.htm';
-   readme_from $grinder, $clean, 'man', "$man_dir/grinder.1";
-   readme_from $script1, $clean, 'man', "$man_dir/average_genome_size.1";
-   readme_from $script2, $clean, 'man', "$man_dir/change_paired_read_orientation.1";
    return 1;
 }
diff --git a/README b/README
index d69c617..d0ee1da 100644
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
 NAME
-    grinder - A versatile omics shotgun and amplicon sequencing read
+    Grinder - A versatile omics shotgun and amplicon sequencing read
     simulator
 
 DESCRIPTION
@@ -90,7 +90,7 @@ CITATION
     Available from <http://dx.doi.org/10.1371/journal.pcbi.1000593>.
 
 VERSION
-    This document refers to grinder version 0.4.5
+    0.4.4
 
 AUTHOR
     Florent Angly <florent.angly at gmail.com>
@@ -112,10 +112,14 @@ INSTALLATION
     The following CPAN Perl modules are dependencies that will be installed
     automatically for you:
 
-    *   Bio::SeqIO, Bio::Root::Root, Bio::Seq::SimulatedRead
+    *   Bio::SeqIO
 
-        Part of the Bioperl package. Bio::Seq::SimulatedReads has not not
-        been released yet and is therefore included here.
+        Part of the Bioperl package
+
+    *   Bio::Seq::SimulatedRead
+
+        Part of Bioperl but included here because it has not been released
+        yet
 
     *   Getopt::Euclid (>= 0.2.8)
 
@@ -277,37 +281,33 @@ CLI EXAMPLES
 
     12. A transcriptomic dataset
 
-           grinder -reference_file transcripts.fna
-
-    13. A unidirectional transcriptomic dataset
-
            grinder -reference_file transcripts.fna -unidirectional 1
 
         Note the use of -unidirectional 1 to prevent reads to be taken from
         the reverse- complement of the reference sequences.
 
-    14. A proteomic dataset
+    13. A proteomic dataset
 
            grinder -reference_file proteins.faa -unidirectional 1
 
-    15. A 16S rRNA amplicon library
+    14. A 16S rRNA amplicon library
 
            grinder -reference_file 16Sgenes.fna -forward_reverse 16Sprimers.fna -length_bias 0 -unidirectional 1
 
         Note the use of -length_bias 0 because reference sequence length
         should not affect the relative abundance of amplicons.
 
-    16. The same amplicon library with 20% of chimeric reads (90% bimera,
+    15. The same amplicon library with 20% of chimeric reads (90% bimera,
         10% trimera)
 
            grinder -reference_file 16Sgenes.fna -forward_reverse 16Sprimers.fna -length_bias 0 -unidirectional 1 -chimera_perc 20 -chimera_dist 90 10
 
-    17. Three 16S rRNA amplicon libraries with specified MIDs and no
+    16. Three 16S rRNA amplicon libraries with specified MIDs and no
         reference sequences in common
 
            grinder -reference_file 16Sgenes.fna -forward_reverse 16Sprimers.fna -length_bias 0 -unidirectional 1 -num_libraries 3 -multiplex_ids MIDs.fna
 
-    18. Reading reference sequences from the standard input, which allows
+    17. Reading reference sequences from the standard input, which allows
         you to decompress FASTA files on the fly:
 
            zcat microbial_db.fna.gz | grinder -reference_file - -total_reads 100
@@ -318,18 +318,24 @@ CLI REQUIRED ARGUMENTS
         FASTA file that contains the input reference sequences (full
         genomes, 16S rRNA genes, transcripts, proteins...) or '-' to read
         them from the standard input. See the README file for examples of
-        databases you can use and where to get them from. Default: -
+        databases you can use and where to get them from. Default:
+        reference_file.default
 
 CLI OPTIONAL ARGUMENTS
+    Basic parameters
+
     -tr <total_reads> | -total_reads <total_reads>
         Number of shotgun or amplicon reads to generate for each library. Do
-        not specify this if you specify the fold coverage. Default: 100
+        not specify this if you specify the fold coverage. Default:
+        total_reads.default
 
     -cf <coverage_fold> | -coverage_fold <coverage_fold>
         Desired fold coverage of the input reference sequences (the output
         FASTA length divided by the input FASTA length). Do not specify this
         if you specify the number of reads directly.
 
+    Advanced shotgun and amplicon parameters
+
     -rd <read_dist>... | -read_dist <read_dist>...
         Desired shotgun or amplicon read length distribution specified as:
         average length, distribution ('uniform' or 'normal') and standard
@@ -345,7 +351,7 @@ CLI OPTIONAL ARGUMENTS
             bp (454 GS-FLX Ti): 450 normal 50
 
         Reference sequences smaller than the specified read length are not
-        used. Default: 100
+        used. Default: read_dist.default
 
     -id <insert_dist>... | -insert_dist <insert_dist>...
         Create paired-end or mate-pair reads spanning the given insert
@@ -354,7 +360,7 @@ CLI OPTIONAL ARGUMENTS
         of DNA between them: 0 : off, or: insert size distribution in bp, in
         the same format as the read length distribution (a typical value is
         2,500 bp) Two distinct reads are generated whether or not the mate
-        pair overlaps. Default: 0
+        pair overlaps. Default: insert_dist.default
 
     -mo <mate_orientation> | -mate_orientation <mate_orientation>
         When generating paired-end or mate-pair reads (see <insert_dist>),
@@ -365,17 +371,17 @@ CLI OPTIONAL ARGUMENTS
            RF:  <--- --->  e.g. Illumina mate-pairs
            RR:  <--- <---
 
-        Default: FR
+        Default: mate_orientation.default
 
     -ec <exclude_chars> | -exclude_chars <exclude_chars>
         Do not create reads containing any of the specified characters (case
         insensitive), e.g. 'N-' to prevent reads with gaps (-) or
-        ambiguities (N). Default: ''
+        ambiguities (N). Default: 'exclude_chars.default'
 
     -dc <delete_chars> | -delete_chars <delete_chars>
         Remove the specified characters from the reference sequences
         (case-insensitive), e.g. 'N-' to renove gaps (-) and ambiguities
-        (N). Default:
+        (N). Default: delete_chars.default
 
     -fr <forward_reverse> | -forward_reverse <forward_reverse>
         Use DNA amplicon sequencing using a forward and reverse PCR primer
@@ -399,21 +405,24 @@ CLI OPTIONAL ARGUMENTS
         strand and its reverse complement, proceed unidirectionally, from
         one strand only (forward or reverse). Values: 0 (off, i.e.
         bidirectional), 1 (forward), -1 (reverse). Use <unidirectional> = 1
-        for strand-specific transcriptomic or proteomic datasets. Default: 0
+        for strand specific transcriptomic or proteomic datasets. Default:
+        unidirectional.default
 
     -lb <length_bias> | -length_bias <length_bias>
         In shotgun libraries, sample reference sequences proportionally to
         their length. For example, in simulated microbial datasets, this
         means that at the same relative abundance, larger genomes contribute
         more reads than smaller genomes (and all genomes have the same fold
-        coverage). 0 = no, 1 = yes. Default: 1
+        coverage). 0 = no, 1 = yes. Default: length_bias.default
 
     -cb <copy_bias> | -copy_bias <copy_bias>
         In amplicon libraries where full genomes are used as input, sample
         species proportionally to the number of copies of the target gene:
         at equal relative abundance, genomes that have multiple copies of
         the target gene contribute more amplicon reads than genomes that
-        have a single copy. 0 = no, 1 = yes. Default: 1
+        have a single copy. 0 = no, 1 = yes. Default: copy_bias.default
+
+    Aberrations and sequencing errors
 
     -md <mutation_dist>... | -mutation_dist <mutation_dist>...
         Introduce sequencing errors in the reads, under the form of
@@ -426,14 +435,14 @@ CLI OPTIONAL ARGUMENTS
         using the 4th degree polynome 3e-3 + 3.3e-8 * i^4 (Korbel et al
         2009), use: poly4 3e-3 3.3e-8. Use the <mutation_ratio> option to
         alter how many of these mutations are substitutions or indels.
-        Default: uniform 0 0
+        Default: mutation_dist.default
 
     -mr <mutation_ratio>... | -mutation_ratio <mutation_ratio>...
         Indicate the percentage of substitutions and the number of indels
         (insertions and deletions). For example, use '80 20' (4
         substitutions for each indel) for Sanger reads. Note that this
         parameter has no effect unless you specify the <mutation_dist>
-        option. Default: 80 20
+        option. Default: mutation_ratio.default
 
     -hd <homopolymer_dist> | -homopolymer_dist <homopolymer_dist>
         Introduce sequencing errors in the reads under the form of
@@ -445,13 +454,14 @@ CLI OPTIONAL ARGUMENTS
           Richter  : N(n, 0.15 * sqrt(n))        ,  Richter et al. 2008.
           Balzer   : N(n, 0.03494 + n * 0.06856) ,  Balzer et al. 2010.
 
-        Default: 0
+        Default: homopolymer_dist.default
 
     -cp <chimera_perc> | -chimera_perc <chimera_perc>
         Specify the percent of reads in amplicon libraries that should be
         chimeric sequences. The 'reference' field in the description of
         chimeric reads will contain the ID of all the reference sequences
-        forming the chimeric template. A typical value is 10%. Default: 0 %
+        forming the chimeric template. A typical value is 10%. Default:
+        chimera_perc.default %
 
     -cd <chimera_dist>... | -chimera_dist <chimera_dist>...
         Specify the distribution of chimeras: bimeras, trimeras, quadrameras
@@ -459,7 +469,7 @@ CLI OPTIONAL ARGUMENTS
         from Quince et al. 2011: '314 38 1', which corresponds to 89% of
         bimeras, 11% of trimeras and 0.3% of quadrameras. Note that this
         option only takes effect when you request the generation of chimeras
-        with the <chimera_perc> option. Default: 314 38 1
+        with the <chimera_perc> option. Default: chimera_dist.default
 
     -ck <chimera_kmer> | -chimera_kmer <chimera_kmer>
         Activate a method to form chimeras by picking breakpoints at places
@@ -476,7 +486,10 @@ CLI OPTIONAL ARGUMENTS
         effect when you request the generation of chimeras with the
         <chimera_perc> option. Also, this options is quite memory intensive,
         so you should probably limit yourself to a relatively small number
-        of reference sequences if you want to use it. Default: 10 bp
+        of reference sequences if you want to use it. Default:
+        chimera_kmer.default bp
+
+    Community structure and diversity
 
     -af <abundance_file> | -abundance_file <abundance_file>
         Specify the relative abundance of the reference sequences manually
@@ -495,13 +508,13 @@ CLI OPTIONAL ARGUMENTS
           powerlaw distribution with parameter 0.1: powerlaw 0.1
           exponential distribution with automatically chosen parameter: exponential
 
-        Default: uniform 1
+        Default: abundance_model.default
 
     -nl <num_libraries> | -num_libraries <num_libraries>
         Number of independent libraries to create. Specify how diverse and
         similar they should be with <diversity>, <shared_perc> and
         <permuted_perc>. Assign them different MID tags with
-        <multiplex_mids>. Default: 1
+        <multiplex_mids>. Default: num_libraries.default
 
     -mi <multiplex_ids> | -multiplex_ids <multiplex_ids>
         Specify an optional FASTA file that contains multiplex sequence
@@ -514,42 +527,45 @@ CLI OPTIONAL ARGUMENTS
         libraries. Use 0 for the maximum diversity possible (based on the
         number of reference sequences available). Provide one value to make
         all libraries have the same diversity, or one diversity value per
-        library otherwise. Default: 0
+        library otherwise. Default: diversity.default
 
     -sp <shared_perc> | -shared_perc <shared_perc>
         For multiple libraries, percent of reference sequences they should
         have in common (relative to the diversity of the least diverse
-        library). Default: 0 %
+        library). Default: shared_perc.default %
 
     -pp <permuted_perc> | -permuted_perc <permuted_perc>
         For multiple libraries, percent of the most-abundant reference
-        sequences to permute in rank-abundance. Default: 0 %
+        sequences to permute in rank-abundance. Default:
+        permuted_perc.default %
+
+    Miscellaneous
 
     -rs <random_seed> | -random_seed <random_seed>
         Seed number to use for the pseudo-random number generator.
 
     -dt <desc_track> | -desc_track <desc_track>
         Track read information (reference sequence, position, errors, ...)
-        by writing it in the read description. Default: 1
+        by writing it in the read description. Default: desc_track.default
 
     -ql <qual_levels>... | -qual_levels <qual_levels>...
         Generate basic quality scores for the simulated reads. Good residues
         are given a specified good score (e.g. 30) and residues that are the
         result of an insertion or substitution are given a specified bad
         score (e.g. 10). Specify first the good score and then the bad score
-        on the command-line, e.g.: 30 10. Default:
+        on the command-line, e.g.: 30 10. Default: qual_levels.default
 
     -fq <fastq_output> | -fastq_output <fastq_output>
         Write the generated reads in FASTQ format (Sanger variant) instead
         of FASTA and QUAL. <qual_levels> need to be specified for this
-        option to be effective. Default: 0
+        option to be effective. Default: fastq_output.default
 
     -bn <base_name> | -base_name <base_name>
-        Prefix of the output files. Default: grinder
+        Prefix of the output files. Default: base_name.default
 
     -od <output_dir> | -output_dir <output_dir>
         Directory where the results should be written. This folder will be
-        created if needed. Default: .
+        created if needed. Default: output_dir.default
 
     -pf <profile_file> | -profile_file <profile_file>
         A file that contains Grinder arguments. This is useful if you use
@@ -693,7 +709,7 @@ API METHODS
     Returns : seed number
 
 COPYRIGHT
-    Copyright 2009-2012 Florent ANGLY <florent.angly at gmail.com>
+    Copyright 2009,2010,2011 Florent ANGLY <florent.angly at gmail.com>
 
     Grinder is free software: you can redistribute it and/or modify it under
     the terms of the GNU General Public License (GPL) as published by the
diff --git a/README.htm b/README.htm
index ac5563f..dfdb4d9 100644
--- a/README.htm
+++ b/README.htm
@@ -2,7 +2,7 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
-<title>grinder - A versatile omics shotgun and amplicon sequencing read simulator</title>
+<title>Grinder - A versatile omics shotgun and amplicon sequencing read simulator</title>
 <meta http-equiv="content-type" content="text/html; charset=utf-8" />
 <link rev="made" href="mailto:root at localhost" />
 </head>
@@ -56,7 +56,7 @@
 <p>
 </p>
 <h1><a name="name">NAME</a></h1>
-<p>grinder - A versatile omics shotgun and amplicon sequencing read simulator</p>
+<p>Grinder - A versatile omics shotgun and amplicon sequencing read simulator</p>
 <p>
 </p>
 <hr />
@@ -159,7 +159,7 @@ Available from L<<a href="http://dx.doi.org/10.1371/journal.pcbi.1000593&gt">
 </p>
 <hr />
 <h1><a name="version">VERSION</a></h1>
-<p>This document refers to grinder version 0.4.5</p>
+<p>0.4.4</p>
 <p>
 </p>
 <hr />
@@ -188,9 +188,12 @@ install the implementation of make of your choice, e.g. GNU make: <a href="http:
 for you:</p>
 <ul>
 <li>
-<p>Bio::SeqIO, Bio::Root::Root, Bio::Seq::SimulatedRead</p>
-<p>Part of the Bioperl package. Bio::Seq::SimulatedReads has not not been released
-yet and is therefore included here.</p>
+<p>Bio::SeqIO</p>
+<p>Part of the Bioperl package</p>
+</li>
+<li>
+<p>Bio::Seq::SimulatedRead</p>
+<p>Part of Bioperl but included here because it has not been released yet</p>
 </li>
 <li>
 <p>Getopt::Euclid (>= 0.2.8)</p>
@@ -352,11 +355,6 @@ around 2.5 kbp and has 0.2 kbp standard deviation</p>
 <li>
 <p>A transcriptomic dataset</p>
 <pre>
-   grinder -reference_file transcripts.fna</pre>
-</li>
-<li>
-<p>A unidirectional transcriptomic dataset</p>
-<pre>
    grinder -reference_file transcripts.fna -unidirectional 1</pre>
 <p>Note the use of -unidirectional 1 to prevent reads to be taken from the reverse-
 complement of the reference sequences.</p>
@@ -402,19 +400,20 @@ decompress FASTA files on the fly:</p>
 <p>FASTA file that contains the input reference sequences (full genomes, 16S rRNA
 genes, transcripts, proteins...) or '-' to read them from the standard input. See the
 README file for examples of databases you can use and where to get them from. 
-Default: -</p>
+Default: reference_file.default</p>
 </dd>
 </dl>
 <p>
 </p>
 <hr />
 <h1><a name="cli_optional_arguments">CLI OPTIONAL ARGUMENTS</a></h1>
+<p>Basic parameters</p>
 <dl>
 <dt><strong><a name="tr_total_reads_total_reads_total_reads" class="item">-tr <total_reads> | -total_reads <total_reads></a></strong></dt>
 
 <dd>
 <p>Number of shotgun or amplicon reads to generate for each library. Do not specify
-this if you specify the fold coverage. Default: 100</p>
+this if you specify the fold coverage. Default: total_reads.default</p>
 </dd>
 <dt><strong><a name="cf_coverage_fold_coverage_fold_coverage_fold" class="item">-cf <coverage_fold> | -coverage_fold <coverage_fold></a></strong></dt>
 
@@ -423,6 +422,9 @@ this if you specify the fold coverage. Default: 100</p>
 divided by the input FASTA length). Do not specify this if you specify the number
 of reads directly.</p>
 </dd>
+</dl>
+<p>Advanced shotgun and amplicon parameters</p>
+<dl>
 <dt><strong><a name="rd_read_dist_read_dist_read_dist" class="item">-rd <read_dist>... | -read_dist <read_dist>...</a></strong></dt>
 
 <dd>
@@ -437,7 +439,7 @@ of reads directly.</p>
   Reads normally distributed with an average of 450 and a standard deviation of 50
     bp (454 GS-FLX Ti): 450 normal 50</pre>
 <p>Reference sequences smaller than the specified read length are not used. Default:
-100</p>
+read_dist.default</p>
 </dd>
 <dt><strong><a name="id_insert_dist_insert_dist_insert_dist" class="item">-id <insert_dist>... | -insert_dist <insert_dist>...</a></strong></dt>
 
@@ -449,7 +451,7 @@ the length of both reads and of the stretch of DNA between them:
    or: insert size distribution in bp, in the same format as the read length
        distribution (a typical value is 2,500 bp)
 Two distinct reads are generated whether or not the mate pair overlaps. Default:
-0</p>
+insert_dist.default</p>
 </dd>
 <dt><strong><a name="mo_mate_orientation_mate_orientation_mate_orientation" class="item">-mo <mate_orientation> | -mate_orientation <mate_orientation></a></strong></dt>
 
@@ -461,20 +463,20 @@ orientation of the reads (F: forward, R: reverse):</p>
    FF:  ---> --->  e.g. 454
    RF:  <--- --->  e.g. Illumina mate-pairs
    RR:  <--- <---</pre>
-<p>Default: FR</p>
+<p>Default: mate_orientation.default</p>
 </dd>
 <dt><strong><a name="ec_exclude_chars_exclude_chars_exclude_chars" class="item">-ec <exclude_chars> | -exclude_chars <exclude_chars></a></strong></dt>
 
 <dd>
 <p>Do not create reads containing any of the specified characters (case 
 insensitive), e.g. 'N-' to prevent reads with gaps (-) or ambiguities (N). 
-Default: ''</p>
+Default: 'exclude_chars.default'</p>
 </dd>
 <dt><strong><a name="dc_delete_chars_delete_chars_delete_chars" class="item">-dc <delete_chars> | -delete_chars <delete_chars></a></strong></dt>
 
 <dd>
 <p>Remove the specified characters from the reference sequences (case-insensitive),
-e.g. 'N-' to renove gaps (-) and ambiguities (N). Default:</p>
+e.g. 'N-' to renove gaps (-) and ambiguities (N). Default: delete_chars.default</p>
 </dd>
 <dt><strong><a name="fr_forward_reverse_forward_reverse_forward_reverse" class="item">-fr <forward_reverse> | -forward_reverse <forward_reverse></a></strong></dt>
 
@@ -498,8 +500,8 @@ and 1392R primers that target the V6 to V9 region of the 16S rRNA gene.</p>
 <p>Instead of producing reads bidirectionally, from the reference strand and its
 reverse complement, proceed unidirectionally, from one strand only (forward or
 reverse). Values: 0 (off, i.e. bidirectional), 1 (forward), -1 (reverse). Use
-<unidirectional> = 1 for strand-specific transcriptomic or proteomic datasets.
-Default: 0</p>
+<unidirectional> = 1 for strand specific transcriptomic or proteomic datasets.
+Default: unidirectional.default</p>
 </dd>
 <dt><strong><a name="lb_length_bias_length_bias_length_bias" class="item">-lb <length_bias> | -length_bias <length_bias></a></strong></dt>
 
@@ -508,7 +510,7 @@ Default: 0</p>
 For example, in simulated microbial datasets, this means that at the same
 relative abundance, larger genomes contribute more reads than smaller genomes
 (and all genomes have the same fold coverage).
-0 = no, 1 = yes. Default: 1</p>
+0 = no, 1 = yes. Default: length_bias.default</p>
 </dd>
 <dt><strong><a name="cb_copy_bias_copy_bias_copy_bias" class="item">-cb <copy_bias> | -copy_bias <copy_bias></a></strong></dt>
 
@@ -517,8 +519,11 @@ relative abundance, larger genomes contribute more reads than smaller genomes
 proportionally to the number of copies of the target gene: at equal relative
 abundance, genomes that have multiple copies of the target gene contribute more
 amplicon reads than genomes that have a single copy. 0 = no, 1 = yes. Default:
-1</p>
+copy_bias.default</p>
 </dd>
+</dl>
+<p>Aberrations and sequencing errors</p>
+<dl>
 <dt><strong><a name="md_mutation_dist_mutation_dist_mutation_dist" class="item">-md <mutation_dist>... | -mutation_dist <mutation_dist>...</a></strong></dt>
 
 <dd>
@@ -530,7 +535,7 @@ errors, use a linear model where the errror rate is 1% at the 5' end of reads an
 2% at the 3' end: linear 1 2. To model Illumina errors using the 4th degree
 polynome 3e-3 + 3.3e-8 * i^4 (Korbel et al 2009), use: poly4 3e-3 3.3e-8.
 Use the <mutation_ratio> option to alter how many of these mutations are
-substitutions or indels. Default: uniform 0 0</p>
+substitutions or indels. Default: mutation_dist.default</p>
 </dd>
 <dt><strong><a name="mr_mutation_ratio_mutation_ratio_mutation_ratio" class="item">-mr <mutation_ratio>... | -mutation_ratio <mutation_ratio>...</a></strong></dt>
 
@@ -538,7 +543,7 @@ substitutions or indels. Default: uniform 0 0</p>
 <p>Indicate the percentage of substitutions and the number of indels (insertions
 and deletions). For example, use '80 20' (4 substitutions for each indel) for
 Sanger reads. Note that this parameter has no effect unless you specify the
-<mutation_dist> option. Default: 80 20</p>
+<mutation_dist> option. Default: mutation_ratio.default</p>
 </dd>
 <dt><strong><a name="hd_homopolymer_dist_homopolymer_dist_homopolymer_dist" class="item">-hd <homopolymer_dist> | -homopolymer_dist <homopolymer_dist></a></strong></dt>
 
@@ -551,7 +556,7 @@ the homopolymer length n:</p>
   Margulies: N(n, 0.15 * n)              ,  Margulies et al. 2005.
   Richter  : N(n, 0.15 * sqrt(n))        ,  Richter et al. 2008.
   Balzer   : N(n, 0.03494 + n * 0.06856) ,  Balzer et al. 2010.</pre>
-<p>Default: 0</p>
+<p>Default: homopolymer_dist.default</p>
 </dd>
 <dt><strong><a name="cp_chimera_perc_chimera_perc_chimera_perc" class="item">-cp <chimera_perc> | -chimera_perc <chimera_perc></a></strong></dt>
 
@@ -559,7 +564,7 @@ the homopolymer length n:</p>
 <p>Specify the percent of reads in amplicon libraries that should be chimeric
 sequences. The 'reference' field in the description of chimeric reads will
 contain the ID of all the reference sequences forming the chimeric template. A
-typical value is 10%. Default: 0 %</p>
+typical value is 10%. Default: chimera_perc.default %</p>
 </dd>
 <dt><strong><a name="cd_chimera_dist_chimera_dist_chimera_dist" class="item">-cd <chimera_dist>... | -chimera_dist <chimera_dist>...</a></strong></dt>
 
@@ -568,7 +573,7 @@ typical value is 10%. Default: 0 %</p>
 multimeras of higher order. The default is the average values from Quince et al.
 2011: '314 38 1', which corresponds to 89% of bimeras, 11% of trimeras and 0.3%
 of quadrameras. Note that this option only takes effect when you request the
-generation of chimeras with the <chimera_perc> option. Default: 314 38 1</p>
+generation of chimeras with the <chimera_perc> option. Default: chimera_dist.default</p>
 </dd>
 <dt><strong><a name="ck_chimera_kmer_chimera_kmer_chimera_kmer" class="item">-ck <chimera_kmer> | -chimera_kmer <chimera_kmer></a></strong></dt>
 
@@ -585,8 +590,11 @@ breakpoints to be taken randomly on the "aligned" reference sequences.
 this option only takes effect when you request the generation of chimeras with
 the <chimera_perc> option. Also, this options is quite memory intensive, so you
 should probably limit yourself to a relatively small number of reference sequences
-if you want to use it. Default: 10 bp</p>
+if you want to use it. Default: chimera_kmer.default bp</p>
 </dd>
+</dl>
+<p>Community structure and diversity</p>
+<dl>
 <dt><strong><a name="af_abundance_file_abundance_file_abundance_file" class="item">-af <abundance_file> | -abundance_file <abundance_file></a></strong></dt>
 
 <dd>
@@ -606,14 +614,14 @@ this parameter is not specified, then it is randomly chosen. Examples:</p>
   uniform distribution: uniform
   powerlaw distribution with parameter 0.1: powerlaw 0.1
   exponential distribution with automatically chosen parameter: exponential</pre>
-<p>Default: uniform 1</p>
+<p>Default: abundance_model.default</p>
 </dd>
 <dt><strong><a name="nl_num_libraries_num_libraries_num_libraries" class="item">-nl <num_libraries> | -num_libraries <num_libraries></a></strong></dt>
 
 <dd>
 <p>Number of independent libraries to create. Specify how diverse and similar they
 should be with <diversity>, <shared_perc> and <permuted_perc>. Assign them
-different MID tags with <multiplex_mids>. Default: 1</p>
+different MID tags with <multiplex_mids>. Default: num_libraries.default</p>
 </dd>
 <dt><strong><a name="mi_multiplex_ids_multiplex_ids_multiplex_ids" class="item">-mi <multiplex_ids> | -multiplex_ids <multiplex_ids></a></strong></dt>
 
@@ -628,20 +636,23 @@ are included in the length specified with the -read_dist option.</p>
 <p>Richness, or number of reference sequences to include in the shotgun libraries.
 Use 0 for the maximum diversity possible (based on the number of reference sequences
 available). Provide one value to make all libraries have the same diversity, or
-one diversity value per library otherwise. Default: 0</p>
+one diversity value per library otherwise. Default: diversity.default</p>
 </dd>
 <dt><strong><a name="sp_shared_perc_shared_perc_shared_perc" class="item">-sp <shared_perc> | -shared_perc <shared_perc></a></strong></dt>
 
 <dd>
 <p>For multiple libraries, percent of reference sequences they should have in common
-(relative to the diversity of the least diverse library). Default: 0 %</p>
+(relative to the diversity of the least diverse library). Default: shared_perc.default %</p>
 </dd>
 <dt><strong><a name="pp_permuted_perc_permuted_perc_permuted_perc" class="item">-pp <permuted_perc> | -permuted_perc <permuted_perc></a></strong></dt>
 
 <dd>
 <p>For multiple libraries, percent of the most-abundant reference sequences to permute
-in rank-abundance. Default: 0 %</p>
+in rank-abundance. Default: permuted_perc.default %</p>
 </dd>
+</dl>
+<p>Miscellaneous</p>
+<dl>
 <dt><strong><a name="rs_random_seed_random_seed_random_seed" class="item">-rs <random_seed> | -random_seed <random_seed></a></strong></dt>
 
 <dd>
@@ -651,7 +662,7 @@ in rank-abundance. Default: 0 %</p>
 
 <dd>
 <p>Track read information (reference sequence, position, errors, ...) by writing
-it in the read description. Default: 1</p>
+it in the read description. Default: desc_track.default</p>
 </dd>
 <dt><strong><a name="ql_qual_levels_qual_levels_qual_levels" class="item">-ql <qual_levels>... | -qual_levels <qual_levels>...</a></strong></dt>
 
@@ -659,25 +670,26 @@ it in the read description. Default: 1</p>
 <p>Generate basic quality scores for the simulated reads. Good residues are given a
 specified good score (e.g. 30) and residues that are the result of an insertion
 or substitution are given a specified bad score (e.g. 10). Specify first the
-good score and then the bad score on the command-line, e.g.: 30 10. Default:</p>
+good score and then the bad score on the command-line, e.g.: 30 10. Default:
+qual_levels.default</p>
 </dd>
 <dt><strong><a name="fq_fastq_output_fastq_output_fastq_output" class="item">-fq <fastq_output> | -fastq_output <fastq_output></a></strong></dt>
 
 <dd>
 <p>Write the generated reads in FASTQ format (Sanger variant) instead of FASTA and
 QUAL. <qual_levels> need to be specified for this option to be effective. 
-Default: 0</p>
+Default: fastq_output.default</p>
 </dd>
 <dt><strong><a name="bn_base_name_base_name_base_name" class="item">-bn <base_name> | -base_name <base_name></a></strong></dt>
 
 <dd>
-<p>Prefix of the output files. Default: grinder</p>
+<p>Prefix of the output files. Default: base_name.default</p>
 </dd>
 <dt><strong><a name="od_output_dir_output_dir_output_dir" class="item">-od <output_dir> | -output_dir <output_dir></a></strong></dt>
 
 <dd>
 <p>Directory where the results should be written. This folder will be created if
-needed. Default: .</p>
+needed. Default: output_dir.default</p>
 </dd>
 <dt><strong><a name="pf_profile_file_profile_file_profile_file" class="item">-pf <profile_file> | -profile_file <profile_file></a></strong></dt>
 
@@ -825,7 +837,7 @@ a synopsis:</p>
 </p>
 <hr />
 <h1><a name="copyright">COPYRIGHT</a></h1>
-<p>Copyright 2009-2012 Florent ANGLY <<a href="mailto:florent.angly at gmail.com">florent.angly at gmail.com</a>></p>
+<p>Copyright 2009,2010,2011 Florent ANGLY <<a href="mailto:florent.angly at gmail.com">florent.angly at gmail.com</a>></p>
 <p>Grinder is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License (GPL) as published by
 the Free Software Foundation, either version 3 of the License, or
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..c6b9d5f
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,20 @@
+grinder (0.4.5-1) UNRELEASED; urgency=low
+
+  * New upstream version
+
+ -- Andreas Tille <tille at debian.org>  Thu, 03 May 2012 08:30:08 +0200
+
+grinder (0.4.4-2) unstable; urgency=low
+
+  * Team upload.
+
+  [ Florent Angly ]
+  * Added copyright information for files in inc/.
+
+ -- Charles Plessy <plessy at debian.org>  Sun, 29 Jan 2012 15:15:08 +0900
+
+grinder (0.4.4-1) REJECTED; urgency=low
+
+  * Initial Release. Closes: #654394
+
+ -- Florent Angly <florent.angly at gmail.com>  Fri, 20 Jan 2012 11:41:11 +1000
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..45a4fb7
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+8
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..5c1b2a1
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,36 @@
+Source: grinder
+Section: science
+Priority: optional
+Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
+Uploaders: Florent Angly <florent.angly at gmail.com>,
+ Andreas Tille <tille at debian.org>
+DM-Upload-Allowed: yes
+Build-Depends: debhelper (>= 8)
+Build-Depends-Indep: libbio-perl-perl,
+ libgetopt-euclid-perl (>= 0.2.8),
+ libmath-random-mt-perl (>= 1.13),
+ help2man
+Standards-Version: 3.9.2
+Homepage: http://sourceforge.net/projects/biogrinder/
+Vcs-Git: git://git.debian.org/debian-med/grinder.git
+Vcs-Browser: http://git.debian.org/?p=debian-med/grinder.git
+
+Package: grinder
+Architecture: all
+Depends: ${misc:Depends}, ${perl:Depends},
+ libbio-perl-perl,
+ libgetopt-euclid-perl (>= 0.2.8),
+ libmath-random-mt-perl (>= 1.13)
+Description: Versatile omics shotgun and amplicon sequencing read simulator
+ Grinder is a versatile program to create random shotgun and amplicon sequence
+ libraries based on DNA, RNA or proteic reference sequences provided in a
+ FASTA file.
+ .
+ Grinder can produce genomic, metagenomic, transcriptomic, metatranscriptomic,
+ proteomic, metaproteomic shotgun and amplicon datasets from current
+ sequencing technologies such as Sanger, 454, Illumina. These simulated
+ datasets can be used to test the accuracy of bioinformatic tools under
+ specific hypothesis, e.g. with or without sequencing errors, or with low or
+ high community diversity. Grinder may also be used to help decide between
+ alternative sequencing methods for a sequence-based project, e.g. should the
+ library be paired-end or not, how many reads should be sequenced.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..98c5647
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,60 @@
+Format: http://dep.debian.net/deps/dep5/
+Upstream-Name: Grinder
+Upstream-Contact: Florent Angly <florent.angly at gmail.com>
+Source: http://sourceforge.net/projects/biogrinder/files/
+
+Files: *
+Copyright: 2009-2011, Florent Angly <florent.angly at gmail.com>
+License: GPL-3+
+
+Files: debian/*
+Copyright: 2012, Florent Angly <florent.angly at gmail.com>
+License: GPL-3+
+
+Files: inc/Module/*
+Copyright: 2002-2011, Adam Kennedy <adamk at cpan.org>
+           2002-2011, Audrey Tang <autrijus at autrijus.org>
+           2002-2011, Brian Ingerson <ingy at cpan.org>
+License: Artistic or GPL-1+
+
+Files: inc/Module/Install/AuthorRequires.pm
+Copyright: 2009, Florian Ragwitz <rafl at debian.org>
+License: Artistic or GPL-1+
+
+Files: inc/Module/Install/AutoLicense.pm
+Copyright: 2010, Chris Williams <chris at bingosnet.co.uk>
+License: Artistic or GPL-1+
+
+Files: inc/Module/Install/AutoManifest.pm
+Copyright: 2008, Hans Dieter Pearcey <hdp at pobox.com>
+License: Artistic or GPL-1+
+
+Files: inc/Module/Install/ReadmeFromPod.pm
+Copyright: 2010, Chris Williams <chris at bingosnet.co.uk>
+License: Artistic or GPL-1+
+
+License: Artistic
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the Artistic License, which comes with Perl.
+ .
+ On Debian systems, the complete text of the Artistic License can be
+ found in `/usr/share/common-licenses/Artistic'.
+
+License: GPL-1+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 1, or (at your option)
+ any later version.
+ .
+ On Debian systems, the complete text of version 1 of the GNU General
+ Public License can be found in `/usr/share/common-licenses/GPL-1'.
+
+License: GPL-3+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 3 dated June, 2007, or (at your
+ option) any later version.
+ .
+ On Debian systems, the complete text of version 3 of the GNU General
+ Public License can be found in `/usr/share/common-licenses/GPL-3'.
+
diff --git a/debian/gbp.conf b/debian/gbp.conf
new file mode 100644
index 0000000..4951fb1
--- /dev/null
+++ b/debian/gbp.conf
@@ -0,0 +1,7 @@
+[DEFAULT]
+pristine-tar = True
+
+[git-buildpackage]
+# use this for more svn-buildpackage like behaviour:
+export-dir = ../build-area/
+tarball-dir = ../tarballs/
diff --git a/debian/grinder.docs b/debian/grinder.docs
new file mode 100644
index 0000000..ea4c277
--- /dev/null
+++ b/debian/grinder.docs
@@ -0,0 +1,2 @@
+README.htm
+README
diff --git a/debian/grinder.manpages b/debian/grinder.manpages
new file mode 100644
index 0000000..13cdaf4
--- /dev/null
+++ b/debian/grinder.manpages
@@ -0,0 +1 @@
+debian/man/*.1
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..1c06364
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,55 @@
+#!/usr/bin/make -f
+
+PACKAGE = grinder
+AUTHOR  = Florent Angly <florent.angly at gmail.com>
+VERSION = 0.4.2
+SECTION = 1
+MANDIR  = debian/man
+
+NAME1   = $(PACKAGE)
+SCRIPT1 = script/$(NAME1)
+DESC1   = 'Versatile omics shotgun and amplicon sequencing read simulator'
+MAN1    = $(MANDIR)/$(NAME1).$(SECTION)
+
+NAME2   = average_genome_size
+SCRIPT2 = utils/$(NAME2)
+DESC2   = 'Calculate the average genome size in Grinder libraries'
+MAN2    = $(MANDIR)/$(NAME2).$(SECTION)
+
+NAME3   = change_paired_read_orientation
+SCRIPT3 = utils/$(NAME3)
+DESC3   = 'Reverses the orientation of each paired\-end FASTA sequence'
+MAN3    = $(MANDIR)/$(NAME3).$(SECTION)
+
+define MORE
+[see also]
+.BR grinder (7),
+.BR $(NAME1) ($(SECTION)),
+.BR $(NAME2) ($(SECTION))
+and
+.BR $(NAME3) ($(SECTION)).
+endef
+export MORE
+MOREFILE = $(MANDIR)/more.$(SECTION)
+
+
+%:
+	dh $@
+
+# Because of Grinder's random output, some tests are brittle and may fail...
+# Only test that the program loads successfully
+override_dh_auto_test:
+	$(MAKE) testdb TEST_FILE=t/00-load.t TESTDB_SW=''
+
+# Generate manpages on the fly
+override_dh_auto_build:
+	mkdir -p $(MANDIR)
+	echo "$$MORE" > $(MOREFILE)
+	help2man --no-discard-stderr --name $(DESC1) --section $(SECTION) --version-string $(VERSION) --no-info --include $(MOREFILE) --output $(MAN1) $(SCRIPT1)
+	help2man --no-discard-stderr --name $(DESC2) --section $(SECTION) --version-string $(VERSION) --no-info --include $(MOREFILE) --output $(MAN2) $(SCRIPT2)
+	help2man --no-discard-stderr --name $(DESC3) --section $(SECTION) --version-string $(VERSION) --no-info --include $(MOREFILE) --output $(MAN3) $(SCRIPT3)
+	rm $(MOREFILE)
+	dh_auto_build
+
+override_dh_auto_clean:
+	rm -rf $(MANDIR)
diff --git a/debian/source/format b/debian/source/format
new file mode 100644
index 0000000..163aaf8
--- /dev/null
+++ b/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/debian/watch b/debian/watch
new file mode 100644
index 0000000..14308be
--- /dev/null
+++ b/debian/watch
@@ -0,0 +1,2 @@
+version=3
+http://sf.net/biogrinder/Grinder-v?(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz|zip)$
diff --git a/galaxy/grinder.xml b/galaxy/grinder.xml
index 8da6d55..3177805 100644
--- a/galaxy/grinder.xml
+++ b/galaxy/grinder.xml
@@ -1,4 +1,4 @@
-<tool id="grinder" name="Grinder" version="0.4.5">
+<tool id="grinder" name="Grinder" version="0.4.4">
 
   <description>versatile omic shotgun and amplicon read simulator</description>
 
diff --git a/inc/Module/Install/ReadmeFromPod.pm b/inc/Module/Install/ReadmeFromPod.pm
index fecda2b..348531e 100644
--- a/inc/Module/Install/ReadmeFromPod.pm
+++ b/inc/Module/Install/ReadmeFromPod.pm
@@ -7,119 +7,29 @@ use warnings;
 use base qw(Module::Install::Base);
 use vars qw($VERSION);
 
-$VERSION = '0.16';
+$VERSION = '0.12';
 
 sub readme_from {
   my $self = shift;
   return unless $self->is_admin;
 
-  # Input file
-  my $in_file  = shift || $self->_all_from
+  my $file = shift || $self->_all_from
     or die "Can't determine file to make readme_from";
+  my $clean = shift;
 
-  # Get optional arguments
-  my ($clean, $format, $out_file, $options);
-  my $args = shift;
-  if ( ref $args ) {
-    # Arguments are in a hashref
-    if ( ref($args) ne 'HASH' ) {
-      die "Expected a hashref but got a ".ref($args)."\n";
-    } else {
-      $clean    = $args->{'clean'};
-      $format   = $args->{'format'};
-      $out_file = $args->{'output_file'};
-      $options  = $args->{'options'};
-    }
-  } else {
-    # Arguments are in a list
-    $clean    = $args;
-    $format   = shift;
-    $out_file = shift;
-    $options  = \@_;
-  }
-
-  # Default values;
-  $clean  ||= 0;
-  $format ||= 'txt';
-
-  # Generate README
-  print "readme_from $in_file to $format\n";
-  if ($format =~ m/te?xt/) {
-    $out_file = $self->_readme_txt($in_file, $out_file, $options);
-  } elsif ($format =~ m/html?/) {
-    $out_file = $self->_readme_htm($in_file, $out_file, $options);
-  } elsif ($format eq 'man') {
-    $out_file = $self->_readme_man($in_file, $out_file, $options);
-  } elsif ($format eq 'pdf') {
-    $out_file = $self->_readme_pdf($in_file, $out_file, $options);
-  }
+  print "Writing README from $file\n";
 
+  require Pod::Text;
+  my $parser = Pod::Text->new();
+  open README, '> README' or die "$!\n";
+  $parser->output_fh( *README );
+  $parser->parse_file( $file );
   if ($clean) {
-    $self->clean_files($out_file);
+    $self->clean_files('README');
   }
-
   return 1;
 }
 
-
-sub _readme_txt {
-  my ($self, $in_file, $out_file, $options) = @_;
-  $out_file ||= 'README';
-  require Pod::Text;
-  my $parser = Pod::Text->new( @$options );
-  open my $out_fh, '>', $out_file or die "Could not write file $out_file:\n$!\n";
-  $parser->output_fh( *$out_fh );
-  $parser->parse_file( $in_file );
-  close $out_fh;
-  return $out_file;
-}
-
-
-sub _readme_htm {
-  my ($self, $in_file, $out_file, $options) = @_;
-  $out_file ||= 'README.htm';
-  require Pod::Html;
-  Pod::Html::pod2html(
-    "--infile=$in_file",
-    "--outfile=$out_file",
-    @$options,
-  );
-  # Remove temporary files if needed
-  for my $file ('pod2htmd.tmp', 'pod2htmi.tmp') {
-    if (-e $file) {
-      unlink $file or warn "Warning: Could not remove file '$file'.\n$!\n";
-    }
-  }
-  return $out_file;
-}
-
-
-sub _readme_man {
-  my ($self, $in_file, $out_file, $options) = @_;
-  $out_file ||= 'README.1';
-  require Pod::Man;
-  my $parser = Pod::Man->new( @$options );
-  $parser->parse_from_file($in_file, $out_file);
-  return $out_file;
-}
-
-
-sub _readme_pdf {
-  my ($self, $in_file, $out_file, $options) = @_;
-  $out_file ||= 'README.pdf';
-  eval { require App::pod2pdf; }
-    or die "Could not generate $out_file because pod2pdf could not be found\n";
-  my $parser = App::pod2pdf->new( @$options );
-  $parser->parse_from_file($in_file);
-  open my $out_fh, '>', $out_file or die "Could not write file $out_file:\n$!\n";
-  select $out_fh;
-  $parser->output;
-  select STDOUT;
-  close $out_fh;
-  return $out_file;
-}
-
-
 sub _all_from {
   my $self = shift;
   return unless $self->admin->{extensions};
@@ -134,5 +44,5 @@ sub _all_from {
 
 __END__
 
-#line 254
+#line 112
 
diff --git a/lib/Grinder.pm b/lib/Grinder.pm
index 2c5525d..8e08d80 100644
--- a/lib/Grinder.pm
+++ b/lib/Grinder.pm
@@ -1,4 +1,4 @@
-# This file is part of the Grinder package, copyright 2009-2012
+# This file is part of the Grinder package, copyright 2009,2010,2011,2012
 # Florent Angly <florent.angly at gmail.com>, under the GPLv3 license
 
 package Grinder;
@@ -12,11 +12,10 @@ use Bio::SeqIO;
 use Grinder::KmerCollection;
 use Bio::Seq::SimulatedRead;
 use Math::Random::MT qw(srand rand);
-use Getopt::Euclid qw(:minimal_keys :defer);
+use Getopt::Euclid qw( :minimal_keys :defer );
 
 
-use version; our $VERSION = version->declare('0.4.5');
-
+our $VERSION = '0.4.4';
 
 #---------- GRINDER POD DOC ---------------------------------------------------#
 
@@ -144,7 +143,7 @@ Available from L<http://dx.doi.org/10.1371/journal.pcbi.1000593>.
 
 =head1 VERSION
 
-0.4.5
+0.4.4
 
 =head1 AUTHOR
 
@@ -180,10 +179,15 @@ for you:
 
 =item *
 
-Bio::SeqIO, Bio::Root::Root, Bio::Seq::SimulatedRead
+Bio::SeqIO
 
-Part of the Bioperl package. Bio::Seq::SimulatedReads has not not been released
-yet and is therefore included here.
+Part of the Bioperl package
+
+=item *
+
+Bio::Seq::SimulatedRead
+
+Part of Bioperl but included here because it has not been released yet
 
 =item *
 
@@ -369,24 +373,18 @@ around 2.5 kbp and has 0.2 kbp standard deviation
 
 A transcriptomic dataset
 
-   grinder -reference_file transcripts.fna
-
-=item 13.
-
-A unidirectional transcriptomic dataset
-
    grinder -reference_file transcripts.fna -unidirectional 1
 
 Note the use of -unidirectional 1 to prevent reads to be taken from the reverse-
 complement of the reference sequences.
 
-=item 14.
+=item 13.
 
 A proteomic dataset
 
    grinder -reference_file proteins.faa -unidirectional 1
 
-=item 15.
+=item 14.
 
 A 16S rRNA amplicon library
 
@@ -395,20 +393,20 @@ A 16S rRNA amplicon library
 Note the use of -length_bias 0 because reference sequence length should not affect
 the relative abundance of amplicons.
 
-=item 16.
+=item 15.
 
 The same amplicon library with 20% of chimeric reads (90% bimera, 10% trimera)
 
    grinder -reference_file 16Sgenes.fna -forward_reverse 16Sprimers.fna -length_bias 0 -unidirectional 1 -chimera_perc 20 -chimera_dist 90 10
 
-=item 17.
+=item 16.
 
 Three 16S rRNA amplicon libraries with specified MIDs and no reference sequences
 in common
 
    grinder -reference_file 16Sgenes.fna -forward_reverse 16Sprimers.fna -length_bias 0 -unidirectional 1 -num_libraries 3 -multiplex_ids MIDs.fna
 
-=item 18.
+=item 17.
 
 Reading reference sequences from the standard input, which allows you to
 decompress FASTA files on the fly:
@@ -560,7 +558,7 @@ and 1392R primers that target the V6 to V9 region of the 16S rRNA gene.
 Instead of producing reads bidirectionally, from the reference strand and its
 reverse complement, proceed unidirectionally, from one strand only (forward or
 reverse). Values: 0 (off, i.e. bidirectional), 1 (forward), -1 (reverse). Use
-<unidirectional> = 1 for strand-specific transcriptomic or proteomic datasets.
+<unidirectional> = 1 for strand specific transcriptomic or proteomic datasets.
 Default: unidirectional.default
 
 =for Euclid:
@@ -999,7 +997,7 @@ Returns : seed number
 
 =head1 COPYRIGHT
 
-Copyright 2009-2012 Florent ANGLY <florent.angly at gmail.com>
+Copyright 2009,2010,2011 Florent ANGLY <florent.angly at gmail.com>
 
 Grinder is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License (GPL) as published by
@@ -1427,16 +1425,10 @@ sub initialize {
     $self->{num_libraries}) if defined $self->{multiplex_ids};
 
   # Import genome sequences
-  my $min_seq_len;
-  if ($self->{chimera_dist_cdf}) {
-    # Each chimera needs >= 1 bp. Use # sequences required by largest chimera.
-    $min_seq_len = scalar @{$self->{chimera_dist}} + 1;
-  } else {
-    $min_seq_len = 1;
-  }
+  my $max_chimera_size = scalar @{$self->{chimera_dist}} + 1; # nof seqs in largest chimera
   $self->{database} = $self->database_create( $self->{reference_file},
     $self->{unidirectional}, $self->{forward_reverse}, $self->{abundance_file},
-    $self->{delete_chars}, $min_seq_len );
+    $self->{delete_chars}, $max_chimera_size );
 
   $self->initialize_alphabet;
   if ( ($self->{alphabet} eq 'protein')     &&
@@ -2247,7 +2239,7 @@ sub rand_seq_chimera {
     if ($self->{chimera_kmer}) {
       @pos = $self->kmer_chimera_fragments($m);
     } else {
-      # TODO: try to not provide $positions and $oids
+      #### TODO: try to not provide $positions and $oids
       @pos = $self->rand_chimera_fragments($m, $sequence, $positions, $oids);
     }
 
@@ -2374,7 +2366,7 @@ sub rand_kmer_chimera_extend {
     if (defined $seqid2) {
 
       # Pick a random breakpoint
-      # TODO: can we prefer a position not too crazy?
+      #### TODO: can we prefer a position not too crazy?
       my $pos1 = $self->rand_kmer_start( $kmer, $seqid1, $start1 );
       my $pos2 = $self->rand_kmer_start( $kmer, $seqid2 );
 
@@ -2663,7 +2655,7 @@ sub rand_point_errors {
   my ($self, $seq_str, $error_specs) = @_;
 
   # Mutation cumulative density functions (cdf) for this sequence length
-  my $seq_len = length $seq_str;
+  my $seq_len = length($seq_str);
   if ( not defined $self->{mutation_cdf}->{$seq_len} ) {
     my $mut_pdf  = []; # probability density function
     my $mut_freq =  0; # average
@@ -2681,18 +2673,13 @@ sub rand_point_errors {
       # Linear error model
       # para 1 is the error rate at the 5' end of the read
       # para 2 is the error rate at the 3' end
-      $mut_freq = abs( $self->{mutation_para2} + $self->{mutation_para1} ) / 2;
-      if ($seq_len == 1) {
-        $$mut_pdf[0] = $mut_freq;
-        $mut_sum = $mut_freq
-      } elsif ($seq_len > 1) {
-        my $slope = ($self->{mutation_para2} - $self->{mutation_para1}) / ($seq_len-1);
-        for my $i (0 .. $seq_len-1) {
-          my $val = $self->{mutation_para1} + $i * $slope;
-          $mut_sum += $val;
-          $$mut_pdf[$i] = $val;
-        }
+      my $slope = ($self->{mutation_para2} - $self->{mutation_para1}) / ($seq_len-1);
+      for my $i (0 .. $seq_len-1) {
+        my $val = $self->{mutation_para1} + $i * $slope;
+        $mut_sum += $val;
+        $$mut_pdf[$i] = $val;
       }
+      $mut_freq = abs( $self->{mutation_para2} + $self->{mutation_para1} ) / 2;
       
     } elsif ($self->{mutation_model} eq 'poly4') {
       # Fourth degree polynomial error model: e = para1 + para2 * i**4
@@ -2963,8 +2950,6 @@ sub database_create {
   my %seq_ids;     # hash of reference sequence IDs and IDs of their amplicons
   my %mol_types;    # hash of count of molecule types (dna, rna, protein)
   while ( my $ref_seq = <$in> ) {
-    # Skip empty sequences
-    next if not $ref_seq->seq;
     # Record molecule type
     $mol_types{$ref_seq->alphabet}++;
     # Skip unwanted sequences
@@ -3001,14 +2986,6 @@ sub database_create {
   }
   undef $in; # close the filehandle (maybe?!)
 
-  # Error if no usable sequences in the database
-  if (scalar keys %seq_ids == 0) {
-    die "Error: No genome sequences could be used. If you specified a file of".
-      " abundances for the genome sequences, make sure that their ID match the".
-      " ID in the FASTA file. If you specified amplicon primers, verify that ".
-      "they match some genome sequences.\n";
-  }
-
   # Determine database type: dna, rna, protein
   my $db_alphabet = $self->database_get_mol_type(\%mol_types);
   $self->{alphabet} = $db_alphabet;
@@ -3024,6 +3001,14 @@ sub database_create {
       "<unidirectional> = 1 with proteic reference sequences\n";
   }
 
+  # Error if no usable sequences in the database
+  if (scalar keys %seq_ids == 0) {
+    die "Error: No genome sequences could be used. If you specified a file of".
+      " abundances for the genome sequences, make sure that their ID match the".
+      " ID in the FASTA file. If you specified amplicon primers, verify that ".
+      "they match some genome sequences.\n";
+  }
+
   my $database = { 'db' => \%seq_db, 'ids' => \%seq_ids };
   return $database;
 }
@@ -3065,46 +3050,25 @@ sub database_extract_amplicons {
   # several 16S rRNA genes. Extract all amplicons from a sequence (both strands)
   # but take only the shortest when amplicons are nested.
   # Fetch amplicons from both strands
-
-  # Get amplicons from forward and reverse strand
-  my $fwd_amplicons = database_extract_amplicons_from_strand($seq, $forward_regexp, $reverse_regexp, 1);
-  my $rev_amplicons = database_extract_amplicons_from_strand($seq, $forward_regexp, $reverse_regexp, -1);
-
-  # Deal with nested amplicons by removing the longest of the two
-  my $re = qr/(\d+)\.\.(\d+)/;
-  for (my $rev = 0; $rev < scalar @$rev_amplicons; $rev++) {
-    my ($rev_start, $rev_end) = ( $rev_amplicons->[$rev]->{_amplicon} =~ m/$re/ );
-    for (my $fwd = 0; $fwd < scalar @$fwd_amplicons; $fwd++) {
-      my ($fwd_start, $fwd_end) = ( $fwd_amplicons->[$fwd]->{_amplicon} =~ m/$re/ );
-      if ( ($fwd_start < $rev_start) && ($rev_end < $fwd_end) ) {
-        splice @$fwd_amplicons, $fwd, 1; # Remove forward amplicon
-        $fwd--;
-        next;
-      }
-      if ( ($rev_start < $fwd_start) && ($fwd_end < $rev_end) ) {
-        splice @$rev_amplicons, $rev, 1; # Remove reverse amplicon
-        $rev--;
-      }
-    }
+  my @amplicons;
+  for my $orientation (1, -1) {
+    my $strand_amplicons = database_extract_amplicons_from_strand($seq,
+      $forward_regexp, $reverse_regexp, $orientation);  
+    push @amplicons, @$strand_amplicons if defined $strand_amplicons;
   }
-  
-  my $amplicons = [ @$fwd_amplicons, @$rev_amplicons ];
-
   # Complain if primers did not match explicitly specified reference sequence
   my $seqid = $seq->id;
   if ( (scalar keys %{$ids_to_keep} > 0) &&
        (exists $$ids_to_keep{$seqid}   ) &&
-       (scalar @$amplicons == 0         ) ) {
+       (scalar @amplicons == 0         ) ) {
     die "Error: Requested sequence $seqid did not match the specified forward primer.\n";
   }
-
-  return $amplicons;
+  return \@amplicons;
 }
 
 
 sub database_extract_amplicons_from_strand {
-  # Get amplicons from the given strand (orientation) of the given sequence.
-  # For nested amplicons, only the shortest is returned to mimic PCR.
+  # Get amplicons from the given strand (orientation) of the given sequence
   my ($seq, $forward_regexp, $reverse_regexp, $orientation) = @_;
 
   # Reverse-complement sequence if looking at a -1 orientation
@@ -3118,7 +3082,7 @@ sub database_extract_amplicons_from_strand {
   }
 
   # Get amplicons from sequence string
-  my $amplicons = [];
+  my $amplicons;
   if ( (defined $forward_regexp) && (not defined $reverse_regexp) ) {
     while ( $seqstr =~ m/($forward_regexp)/g ) {
       my $start = pos($seqstr) - length($1) + 1;
@@ -3149,15 +3113,13 @@ sub database_create_amplicon {
   my ($seq, $start, $end, $orientation) = @_;
   my $amplicon;
   my $coord;
-
   if ($orientation == -1) {
     # Calculate coordinates relative to forward strand. For example, given a
     # read starting at 10 and ending at 23 on the reverse complement of a 100 bp
     # sequence, return complement(77..90).
-    $amplicon = $seq->revcom->trunc($start, $end);
-    my $seq_len = $seq->length;
-    $start = $seq_len - $start + 1;
-    $end   = $seq_len - $end + 1;
+    $amplicon = $seq->trunc($start, $end)->revcom;
+    $start = $seq->length - $start + 1;
+    $end   = $seq->length - $end + 1;
     ($start, $end) = ($end, $start);
     $coord = "complement($start..$end)";
   } else {
@@ -3165,7 +3127,6 @@ sub database_create_amplicon {
     $coord = "$start..$end";
   }
   $amplicon->{_amplicon} = $coord;
-
   return $amplicon
 }
 
diff --git a/man/average_genome_size.1 b/man/average_genome_size.1
deleted file mode 100644
index 4d1649a..0000000
--- a/man/average_genome_size.1
+++ /dev/null
@@ -1,172 +0,0 @@
-.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.19)
-.\"
-.\" Standard preamble:
-.\" ========================================================================
-.de Sp \" Vertical space (when we can't use .PP)
-.if t .sp .5v
-.if n .sp
-..
-.de Vb \" Begin verbatim text
-.ft CW
-.nf
-.ne \\$1
-..
-.de Ve \" End verbatim text
-.ft R
-.fi
-..
-.\" Set up some character translations and predefined strings.  \*(-- will
-.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
-.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
-.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
-.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
-.\" nothing in troff, for use with C<>.
-.tr \(*W-
-.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
-.ie n \{\
-.    ds -- \(*W-
-.    ds PI pi
-.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
-.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
-.    ds L" ""
-.    ds R" ""
-.    ds C` ""
-.    ds C' ""
-'br\}
-.el\{\
-.    ds -- \|\(em\|
-.    ds PI \(*p
-.    ds L" ``
-.    ds R" ''
-'br\}
-.\"
-.\" Escape single quotes in literal strings from groff's Unicode transform.
-.ie \n(.g .ds Aq \(aq
-.el       .ds Aq '
-.\"
-.\" If the F register is turned on, we'll generate index entries on stderr for
-.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
-.\" entries marked with X<> in POD.  Of course, you'll have to process the
-.\" output yourself in some meaningful fashion.
-.ie \nF \{\
-.    de IX
-.    tm Index:\\$1\t\\n%\t"\\$2"
-..
-.    nr % 0
-.    rr F
-.\}
-.el \{\
-.    de IX
-..
-.\}
-.\"
-.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
-.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
-.    \" fudge factors for nroff and troff
-.if n \{\
-.    ds #H 0
-.    ds #V .8m
-.    ds #F .3m
-.    ds #[ \f1
-.    ds #] \fP
-.\}
-.if t \{\
-.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
-.    ds #V .6m
-.    ds #F 0
-.    ds #[ \&
-.    ds #] \&
-.\}
-.    \" simple accents for nroff and troff
-.if n \{\
-.    ds ' \&
-.    ds ` \&
-.    ds ^ \&
-.    ds , \&
-.    ds ~ ~
-.    ds /
-.\}
-.if t \{\
-.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
-.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
-.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
-.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
-.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
-.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
-.\}
-.    \" troff and (daisy-wheel) nroff accents
-.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
-.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
-.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
-.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
-.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
-.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
-.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
-.ds ae a\h'-(\w'a'u*4/10)'e
-.ds Ae A\h'-(\w'A'u*4/10)'E
-.    \" corrections for vroff
-.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
-.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
-.    \" for low resolution devices (crt and lpr)
-.if \n(.H>23 .if \n(.V>19 \
-\{\
-.    ds : e
-.    ds 8 ss
-.    ds o a
-.    ds d- d\h'-1'\(ga
-.    ds D- D\h'-1'\(hy
-.    ds th \o'bp'
-.    ds Th \o'LP'
-.    ds ae ae
-.    ds Ae AE
-.\}
-.rm #[ #] #H #V #F C
-.\" ========================================================================
-.\"
-.IX Title "AVERAGE_GENOME_SIZE 1"
-.TH AVERAGE_GENOME_SIZE 1 "2012-02-08" "perl v5.14.2" "User Contributed Perl Documentation"
-.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
-.\" way too many mistakes in technical documents.
-.if n .ad l
-.nh
-.SH "NAME"
-average_genome_size \- Calculate the average genome size (in bp) of species in a Grinder library
-.SH "DESCRIPTION"
-.IX Header "DESCRIPTION"
-Calculate the average genome size (in bp) of species in a Grinder library given
-the library composition and the full-genomes used to produce it.
-.SH "REQUIRED ARGUMENTS"
-.IX Header "REQUIRED ARGUMENTS"
-.IP "<db_fasta>" 4
-.IX Item "<db_fasta>"
-\&\s-1FASTA\s0 file containing the full-genomes used to produce the Grinder library.
-.IP "<rank_file>" 4
-.IX Item "<rank_file>"
-Grinder rank file that describes the library composition.
-.SH "COPYRIGHT"
-.IX Header "COPYRIGHT"
-Copyright 2009\-2012 Florent \s-1ANGLY\s0 <florent.angly at gmail.com>
-.PP
-Grinder is free software: you can redistribute it and/or modify
-it under the terms of the \s-1GNU\s0 General Public License (\s-1GPL\s0) as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-Grinder is distributed in the hope that it will be useful,
-but \s-1WITHOUT\s0 \s-1ANY\s0 \s-1WARRANTY\s0; without even the implied warranty of
-\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS\s0 \s-1FOR\s0 A \s-1PARTICULAR\s0 \s-1PURPOSE\s0.  See the
-\&\s-1GNU\s0 General Public License for more details.
-You should have received a copy of the \s-1GNU\s0 General Public License
-along with Grinder.  If not, see <http://www.gnu.org/licenses/>.
-.SH "BUGS"
-.IX Header "BUGS"
-All complex software has bugs lurking in it, and this program is no exception.
-If you find a bug, please report it on the SourceForge Tracker for Grinder:
-<http://sourceforge.net/tracker/?group_id=244196&atid=1124737>
-.PP
-Bug reports, suggestions and patches are welcome. Grinder's code is developed
-on Sourceforge (<http://sourceforge.net/scm/?type=git&group_id=244196>) and is
-under Git revision control. To get started with a patch, do:
-.PP
-.Vb 1
-\&   git clone git://biogrinder.git.sourceforge.net/gitroot/biogrinder/biogrinder
-.Ve
diff --git a/man/change_paired_read_orientation.1 b/man/change_paired_read_orientation.1
deleted file mode 100644
index 3fd6b92..0000000
--- a/man/change_paired_read_orientation.1
+++ /dev/null
@@ -1,173 +0,0 @@
-.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.19)
-.\"
-.\" Standard preamble:
-.\" ========================================================================
-.de Sp \" Vertical space (when we can't use .PP)
-.if t .sp .5v
-.if n .sp
-..
-.de Vb \" Begin verbatim text
-.ft CW
-.nf
-.ne \\$1
-..
-.de Ve \" End verbatim text
-.ft R
-.fi
-..
-.\" Set up some character translations and predefined strings.  \*(-- will
-.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
-.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
-.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
-.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
-.\" nothing in troff, for use with C<>.
-.tr \(*W-
-.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
-.ie n \{\
-.    ds -- \(*W-
-.    ds PI pi
-.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
-.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
-.    ds L" ""
-.    ds R" ""
-.    ds C` ""
-.    ds C' ""
-'br\}
-.el\{\
-.    ds -- \|\(em\|
-.    ds PI \(*p
-.    ds L" ``
-.    ds R" ''
-'br\}
-.\"
-.\" Escape single quotes in literal strings from groff's Unicode transform.
-.ie \n(.g .ds Aq \(aq
-.el       .ds Aq '
-.\"
-.\" If the F register is turned on, we'll generate index entries on stderr for
-.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
-.\" entries marked with X<> in POD.  Of course, you'll have to process the
-.\" output yourself in some meaningful fashion.
-.ie \nF \{\
-.    de IX
-.    tm Index:\\$1\t\\n%\t"\\$2"
-..
-.    nr % 0
-.    rr F
-.\}
-.el \{\
-.    de IX
-..
-.\}
-.\"
-.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
-.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
-.    \" fudge factors for nroff and troff
-.if n \{\
-.    ds #H 0
-.    ds #V .8m
-.    ds #F .3m
-.    ds #[ \f1
-.    ds #] \fP
-.\}
-.if t \{\
-.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
-.    ds #V .6m
-.    ds #F 0
-.    ds #[ \&
-.    ds #] \&
-.\}
-.    \" simple accents for nroff and troff
-.if n \{\
-.    ds ' \&
-.    ds ` \&
-.    ds ^ \&
-.    ds , \&
-.    ds ~ ~
-.    ds /
-.\}
-.if t \{\
-.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
-.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
-.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
-.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
-.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
-.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
-.\}
-.    \" troff and (daisy-wheel) nroff accents
-.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
-.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
-.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
-.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
-.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
-.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
-.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
-.ds ae a\h'-(\w'a'u*4/10)'e
-.ds Ae A\h'-(\w'A'u*4/10)'E
-.    \" corrections for vroff
-.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
-.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
-.    \" for low resolution devices (crt and lpr)
-.if \n(.H>23 .if \n(.V>19 \
-\{\
-.    ds : e
-.    ds 8 ss
-.    ds o a
-.    ds d- d\h'-1'\(ga
-.    ds D- D\h'-1'\(hy
-.    ds th \o'bp'
-.    ds Th \o'LP'
-.    ds ae ae
-.    ds Ae AE
-.\}
-.rm #[ #] #H #V #F C
-.\" ========================================================================
-.\"
-.IX Title "CHANGE_PAIRED_READ_ORIENTATION 1"
-.TH CHANGE_PAIRED_READ_ORIENTATION 1 "2012-02-08" "perl v5.14.2" "User Contributed Perl Documentation"
-.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
-.\" way too many mistakes in technical documents.
-.if n .ad l
-.nh
-.SH "NAME"
-change_paired_read_orientation \- Change the orientation of paired\-end reads in a
-FASTA file
-.SH "DESCRIPTION"
-.IX Header "DESCRIPTION"
-Reverse the orientation, i.e. reverse-complement each right-hand paired-end read
-(\s-1ID\s0 ending in /2) in a \s-1FASTA\s0 file.
-.SH "REQUIRED ARGUMENTS"
-.IX Header "REQUIRED ARGUMENTS"
-.IP "<in_fasta>" 4
-.IX Item "<in_fasta>"
-\&\s-1FASTA\s0 file containing the reads to re-orient.
-.IP "<out_fasta>" 4
-.IX Item "<out_fasta>"
-Output \s-1FASTA\s0 file where to write the reads.
-.SH "COPYRIGHT"
-.IX Header "COPYRIGHT"
-Copyright 2009\-2012 Florent \s-1ANGLY\s0 <florent.angly at gmail.com>
-.PP
-Grinder is free software: you can redistribute it and/or modify
-it under the terms of the \s-1GNU\s0 General Public License (\s-1GPL\s0) as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-Grinder is distributed in the hope that it will be useful,
-but \s-1WITHOUT\s0 \s-1ANY\s0 \s-1WARRANTY\s0; without even the implied warranty of
-\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS\s0 \s-1FOR\s0 A \s-1PARTICULAR\s0 \s-1PURPOSE\s0.  See the
-\&\s-1GNU\s0 General Public License for more details.
-You should have received a copy of the \s-1GNU\s0 General Public License
-along with Grinder.  If not, see <http://www.gnu.org/licenses/>.
-.SH "BUGS"
-.IX Header "BUGS"
-All complex software has bugs lurking in it, and this program is no exception.
-If you find a bug, please report it on the SourceForge Tracker for Grinder:
-<http://sourceforge.net/tracker/?group_id=244196&atid=1124737>
-.PP
-Bug reports, suggestions and patches are welcome. Grinder's code is developed
-on Sourceforge (<http://sourceforge.net/scm/?type=git&group_id=244196>) and is
-under Git revision control. To get started with a patch, do:
-.PP
-.Vb 1
-\&   git clone git://biogrinder.git.sourceforge.net/gitroot/biogrinder/biogrinder
-.Ve
diff --git a/man/grinder.1 b/man/grinder.1
deleted file mode 100644
index b7c4541..0000000
--- a/man/grinder.1
+++ /dev/null
@@ -1,863 +0,0 @@
-.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.19)
-.\"
-.\" Standard preamble:
-.\" ========================================================================
-.de Sp \" Vertical space (when we can't use .PP)
-.if t .sp .5v
-.if n .sp
-..
-.de Vb \" Begin verbatim text
-.ft CW
-.nf
-.ne \\$1
-..
-.de Ve \" End verbatim text
-.ft R
-.fi
-..
-.\" Set up some character translations and predefined strings.  \*(-- will
-.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
-.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
-.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
-.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
-.\" nothing in troff, for use with C<>.
-.tr \(*W-
-.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
-.ie n \{\
-.    ds -- \(*W-
-.    ds PI pi
-.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
-.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
-.    ds L" ""
-.    ds R" ""
-.    ds C` ""
-.    ds C' ""
-'br\}
-.el\{\
-.    ds -- \|\(em\|
-.    ds PI \(*p
-.    ds L" ``
-.    ds R" ''
-'br\}
-.\"
-.\" Escape single quotes in literal strings from groff's Unicode transform.
-.ie \n(.g .ds Aq \(aq
-.el       .ds Aq '
-.\"
-.\" If the F register is turned on, we'll generate index entries on stderr for
-.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
-.\" entries marked with X<> in POD.  Of course, you'll have to process the
-.\" output yourself in some meaningful fashion.
-.ie \nF \{\
-.    de IX
-.    tm Index:\\$1\t\\n%\t"\\$2"
-..
-.    nr % 0
-.    rr F
-.\}
-.el \{\
-.    de IX
-..
-.\}
-.\"
-.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
-.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
-.    \" fudge factors for nroff and troff
-.if n \{\
-.    ds #H 0
-.    ds #V .8m
-.    ds #F .3m
-.    ds #[ \f1
-.    ds #] \fP
-.\}
-.if t \{\
-.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
-.    ds #V .6m
-.    ds #F 0
-.    ds #[ \&
-.    ds #] \&
-.\}
-.    \" simple accents for nroff and troff
-.if n \{\
-.    ds ' \&
-.    ds ` \&
-.    ds ^ \&
-.    ds , \&
-.    ds ~ ~
-.    ds /
-.\}
-.if t \{\
-.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
-.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
-.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
-.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
-.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
-.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
-.\}
-.    \" troff and (daisy-wheel) nroff accents
-.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
-.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
-.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
-.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
-.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
-.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
-.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
-.ds ae a\h'-(\w'a'u*4/10)'e
-.ds Ae A\h'-(\w'A'u*4/10)'E
-.    \" corrections for vroff
-.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
-.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
-.    \" for low resolution devices (crt and lpr)
-.if \n(.H>23 .if \n(.V>19 \
-\{\
-.    ds : e
-.    ds 8 ss
-.    ds o a
-.    ds d- d\h'-1'\(ga
-.    ds D- D\h'-1'\(hy
-.    ds th \o'bp'
-.    ds Th \o'LP'
-.    ds ae ae
-.    ds Ae AE
-.\}
-.rm #[ #] #H #V #F C
-.\" ========================================================================
-.\"
-.IX Title "GRINDER 1"
-.TH GRINDER 1 "2012-02-12" "perl v5.14.2" "User Contributed Perl Documentation"
-.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
-.\" way too many mistakes in technical documents.
-.if n .ad l
-.nh
-.SH "NAME"
-grinder \- A versatile omics shotgun and amplicon sequencing read simulator
-.SH "DESCRIPTION"
-.IX Header "DESCRIPTION"
-Grinder is a versatile program to create random shotgun and amplicon sequence
-libraries based on \s-1DNA\s0, \s-1RNA\s0 or proteic reference sequences provided in a \s-1FASTA\s0
-file.
-.PP
-Grinder can produce genomic, metagenomic, transcriptomic, metatranscriptomic,
-proteomic, metaproteomic shotgun and amplicon datasets from current sequencing
-technologies such as Sanger, 454, Illumina. These simulated datasets can be used
-to test the accuracy of bioinformatic tools under specific hypothesis, e.g. with
-or without sequencing errors, or with low or high community diversity. Grinder
-may also be used to help decide between alternative sequencing methods for a
-sequence-based project, e.g. should the library be paired-end or not, how many
-reads should be sequenced.
-.PP
-Grinder features include:
-.IP "\(bu" 4
-shotgun or amplicon read libraries
-.IP "\(bu" 4
-omics support to generate genomic, transcriptomic, proteomic,
-metagenomic, metatranscriptomic or metaproteomic datasets
-.IP "\(bu" 4
-arbitrary read length distribution and number of reads
-.IP "\(bu" 4
-simulation of \s-1PCR\s0 and sequencing errors (chimeras, point mutations, homopolymers)
-.IP "\(bu" 4
-support for paired-end (mate pair) datasets
-.IP "\(bu" 4
-specific rank-abundance settings or manually given abundance for each genome, gene or protein
-.IP "\(bu" 4
-creation of datasets with a given richness (alpha diversity)
-.IP "\(bu" 4
-independent datasets can share a variable number of genomes (beta diversity)
-.IP "\(bu" 4
-modeling of the bias created by varying genome lengths or gene copy number
-.IP "\(bu" 4
-profile mechanism to store preferred options
-.IP "\(bu" 4
-available to biologists or power users through multiple interfaces: \s-1GUI\s0, \s-1CLI\s0 and \s-1API\s0
-.PP
-Briefly, given a \s-1FASTA\s0 file containing reference sequence (genomes, genes,
-transcripts or proteins), Grinder performs the following steps:
-.IP "1." 4
-Read the reference sequences, and for amplicon datasets, extracts full-length
-reference \s-1PCR\s0 amplicons using the provided degenerate \s-1PCR\s0 primers.
-.IP "2." 4
-Determine the community structure based on the provided alpha diversity (number
-of reference sequences in the library), beta diversity (number of reference
-sequences in common between several independent libraries) and specified rank\-
-abundance model.
-.IP "3." 4
-Take shotgun reads from the reference sequences or amplicon reads from the full\-
-length reference \s-1PCR\s0 amplicons. The reads may be paired-end reads when an insert
-size distribution is specified. The length of the reads depends on the provided
-read length distribution and their abundance depends on the relative abundance
-in the community structure. Genome length may also biases the number of reads to
-take for shotgun datasets at this step. Similarly, for amplicon datasets, the
-number of copies of the target gene in the reference genomes may bias the number
-of reads to take.
-.IP "4." 4
-Alter reads by inserting sequencing errors (indels, substitutions and homopolymer
-errors) following a position-specific model to simulate reads created by current
-sequencing technologies (Sanger, 454, Illumina). Write the reads and their
-quality scores in \s-1FASTA\s0, \s-1QUAL\s0 and \s-1FASTQ\s0 files.
-.SH "CITATION"
-.IX Header "CITATION"
-If you use Grinder in your research, please cite:
-.PP
-.Vb 2
-\&   Angly FE, Willner D, Rohwer F, Hugenholtz P, Tyson GW (2011) Grinder: a
-\&   versatile sequence simulator for environmental shotgun and amplicon datasets
-.Ve
-.PP
-In review...
-.PP
-.Vb 3
-\&   Angly FE, Willner D, Prieto\-DavA\*~X A, Edwards RA, Schmieder R, et al. (2009) The
-\&   GAAS Metagenomic Tool and Its Estimations of Viral and Microbial Average Genome
-\&   Size in Four Major Biomes. PLoS Comput Biol 5(12): e1000593.
-.Ve
-.PP
-Available from <http://dx.doi.org/10.1371/journal.pcbi.1000593>.
-.SH "VERSION"
-.IX Header "VERSION"
-This document refers to grinder version 0.4.5
-.SH "AUTHOR"
-.IX Header "AUTHOR"
-Florent Angly <florent.angly at gmail.com>
-.SH "INSTALLATION"
-.IX Header "INSTALLATION"
-.SS "Dependencies"
-.IX Subsection "Dependencies"
-You need to install these dependencies first:
-.IP "\(bu" 4
-Perl
-.Sp
-<http://www.perl.com/download.csp>
-.IP "\(bu" 4
-make
-.Sp
-Many systems have make installed by default. If your system does not, you should
-install the implementation of make of your choice, e.g. \s-1GNU\s0 make: <http://www.gnu.org/s/make/>
-.PP
-The following \s-1CPAN\s0 Perl modules are dependencies that will be installed automatically
-for you:
-.IP "\(bu" 4
-Bio::SeqIO, Bio::Root::Root, Bio::Seq::SimulatedRead
-.Sp
-Part of the Bioperl package. Bio::Seq::SimulatedReads has not not been released
-yet and is therefore included here.
-.IP "\(bu" 4
-Getopt::Euclid (>= 0.2.8)
-.IP "\(bu" 4
-Math::Random::MT (>= 1.13)
-.SS "Procedure"
-.IX Subsection "Procedure"
-To install Grinder globally on your system, run the following commands in a
-terminal or command prompt:
-.PP
-On Linux, Unix, MacOS:
-.PP
-.Vb 2
-\&   perl Makefile.PL
-\&   make
-.Ve
-.PP
-And finally, with administrator privileges:
-.PP
-.Vb 1
-\&   make install
-.Ve
-.PP
-On Windows, run the same commands but with nmake instead of make.
-.SS "No administrator privileges?"
-.IX Subsection "No administrator privileges?"
-If you do not have administrator privileges, Grinder needs to be installed in
-your home directory.
-.PP
-First, follow the instructions to install local::lib
-at http://search.cpan.org/~apeiron/local\-lib\-1.008004/lib/local/lib.pm#The_bootstrapping_technique <http://search.cpan.org/~apeiron/local-lib-1.008004/lib/local/lib.pm#The_bootstrapping_technique>. After local::lib is installed, every Perl
-module that you install manually or through the \s-1CPAN\s0 command-line application
-will be installed in your home directory.
-.PP
-Then, install Grinder by following the instructions detailed in the \*(L"Procedure\*(R"
-section.
-.SH "RUNNING GRINDER"
-.IX Header "RUNNING GRINDER"
-After installation, you can run Grinder using a command-line interface (\s-1CLI\s0), 
-an application programming interface (\s-1API\s0) or a graphical user interface (\s-1GUI\s0)
-in Galaxy.
-.PP
-To get the usage of the \s-1CLI\s0, type:
-.PP
-.Vb 1
-\&  grinder \-\-help
-.Ve
-.PP
-More information, including the documentation of the Grinder \s-1API\s0, which allows
-you to run Grinder from within other Perl programs, is available by typing:
-.PP
-.Vb 1
-\&  perldoc Grinder
-.Ve
-.PP
-To run the \s-1GUI\s0, refer to the Galaxy documentation at <http://wiki.g2.bx.psu.edu/FrontPage>.
-.PP
-The 'utils' folder included in the Grinder package contains some utilities:
-.IP "average genome size:" 4
-.IX Item "average genome size:"
-This calculates the average genome size (in bp) of a simulated random library
-produced by Grinder.
-.IP "change_paired_read_orientation:" 4
-.IX Item "change_paired_read_orientation:"
-This reverses the orientation of each second mate-pair read (\s-1ID\s0 ending in /2)
-in a \s-1FASTA\s0 file.
-.SH "REFERENCE SEQUENCE DATABASE"
-.IX Header "REFERENCE SEQUENCE DATABASE"
-A variety of \s-1FASTA\s0 databases can be used as input for Grinder. For example, the
-GreenGenes database (<http://greengenes.lbl.gov/Download/Sequence_Data/Fasta_data_files/Isolated_named_strains_16S_aligned.fasta>)
-contains over 180,000 16S rRNA clone sequences from various species which would
-be appropriate to produce a 16S rRNA amplicon dataset. A set of over 41,000 \s-1OTU\s0
-representative sequences and their affiliation in seven different taxonomic
-sytems can also be used for the same purpose (<http://greengenes.lbl.gov/Download/OTUs/gg_otus_6oct2010/rep_set/gg_97_otus_6oct2010.fasta>
-and <http://greengenes.lbl.gov/Download/OTUs/gg_otus_6oct2010/taxonomies/>). The
-\&\s-1RDP\s0 (<http://rdp.cme.msu.edu/download/release10_27_unaligned.fa.gz>) and Silva
-(http://www.arb\-silva.de/no_cache/download/archive/release_108/Exports/ <http://www.arb-silva.de/no_cache/download/archive/release_108/Exports/>)
-databases also provide many 16S rRNA sequences and Silva includes eukaryotic
-sequences. While 16S rRNA is a popular gene, datasets containing any type of gene
-could be used in the same fashion to generate simulated amplicon datasets, provided
-appropriate primers are used.
-.PP
-The >2,400 curated microbial genome sequences in the \s-1NCBI\s0 RefSeq collection
-(<ftp://ftp.ncbi.nih.gov/refseq/release/microbial/>) would also be suitable for
-producing 16S rRNA simulated datasets (using the adequate primers). However, the
-lower diversity of this database compared to the previous two makes it more
-appropriate for producing artificial microbial metagenomes. Individual genomes
-from this database are also very suitable for the simulation of single or
-double-barreled shotgun libraries. Similarly, the RefSeq database contains
-over 3,100 curated viral sequences (<ftp://ftp.ncbi.nih.gov/refseq/release/viral/>)
-which can be used to produce artificial viral metagenomes.
-.PP
-Quite a few eukaryotic organisms have been sequenced and their genome or genes
-can be the basis for simulating genomic, transcriptomic (RNA-seq) or proteomic 
-datasets. For example, you can use the human genome available at
-<ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/>, the human transcripts
-downloadable from <ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.rna.fna.gz>
-or the human proteome at <ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.faa.gz>.
-.SH "CLI EXAMPLES"
-.IX Header "CLI EXAMPLES"
-Here are a few examples that illustrate the use of Grinder in a terminal:
-.IP "1." 4
-A shotgun \s-1DNA\s0 library with a coverage of 0.1X
-.Sp
-.Vb 1
-\&   grinder \-reference_file genomes.fna \-coverage_fold 0.1
-.Ve
-.IP "2." 4
-Same thing but save the result files in a specific folder and with a specific name
-.Sp
-.Vb 1
-\&   grinder \-reference_file genomes.fna \-coverage_fold 0.1 \-base_name my_name \-output_dir my_dir
-.Ve
-.IP "3." 4
-A \s-1DNA\s0 shotgun library with 1000 reads
-.Sp
-.Vb 1
-\&   grinder \-reference_file genomes.fna \-total_reads 1000
-.Ve
-.IP "4." 4
-A \s-1DNA\s0 shotgun library where species are distributed according to a power law
-.Sp
-.Vb 1
-\&   grinder \-reference_file genomes.fna \-abundance_model powerlaw 0.1
-.Ve
-.IP "5." 4
-A \s-1DNA\s0 shotgun library with 123 species
-.Sp
-.Vb 1
-\&   grinder \-reference_file genomes.fna \-diversity 123
-.Ve
-.IP "6." 4
-Two \s-1DNA\s0 shotgun libraries that have 50% of the species in common
-.Sp
-.Vb 1
-\&   grinder \-reference_file genomes.fna \-num_libraries 2 \-shared_perc 50
-.Ve
-.IP "7." 4
-Two \s-1DNA\s0 shotgun library with no species in common and distributed according to a
-exponential rank-abundance model. Note that because the parameter value for the
-exponential model is omitted, each library uses a different randomly chosen value:
-.Sp
-.Vb 1
-\&   grinder \-reference_file genomes.fna \-num_libraries 2 \-abundance_model exponential
-.Ve
-.IP "8." 4
-A \s-1DNA\s0 shotgun library where species relative abundances are manually specified
-.Sp
-.Vb 1
-\&   grinder \-reference_file genomes.fna \-abundance_file my_abundances.txt
-.Ve
-.IP "9." 4
-A \s-1DNA\s0 shotgun library with Sanger reads
-.Sp
-.Vb 1
-\&   grinder \-reference_file genomes.fna \-read_dist 800 \-mutation_dist linear 1 2 \-mutation_ratio 80 20
-.Ve
-.IP "10." 4
-A \s-1DNA\s0 shotgun library with first-generation 454 reads
-.Sp
-.Vb 1
-\&   grinder \-reference_file genomes.fna \-read_dist 100 normal 10 \-homopolymer_dist balzer
-.Ve
-.IP "11." 4
-A paired-end \s-1DNA\s0 shotgun library, where the insert size is normally distributed
-around 2.5 kbp and has 0.2 kbp standard deviation
-.Sp
-.Vb 1
-\&   grinder \-reference_file genomes.fna \-insert_dist 2500 normal 200
-.Ve
-.IP "12." 4
-A transcriptomic dataset
-.Sp
-.Vb 1
-\&   grinder \-reference_file transcripts.fna
-.Ve
-.IP "13." 4
-A unidirectional transcriptomic dataset
-.Sp
-.Vb 1
-\&   grinder \-reference_file transcripts.fna \-unidirectional 1
-.Ve
-.Sp
-Note the use of \-unidirectional 1 to prevent reads to be taken from the reverse\-
-complement of the reference sequences.
-.IP "14." 4
-A proteomic dataset
-.Sp
-.Vb 1
-\&   grinder \-reference_file proteins.faa \-unidirectional 1
-.Ve
-.IP "15." 4
-A 16S rRNA amplicon library
-.Sp
-.Vb 1
-\&   grinder \-reference_file 16Sgenes.fna \-forward_reverse 16Sprimers.fna \-length_bias 0 \-unidirectional 1
-.Ve
-.Sp
-Note the use of \-length_bias 0 because reference sequence length should not affect
-the relative abundance of amplicons.
-.IP "16." 4
-The same amplicon library with 20% of chimeric reads (90% bimera, 10% trimera)
-.Sp
-.Vb 1
-\&   grinder \-reference_file 16Sgenes.fna \-forward_reverse 16Sprimers.fna \-length_bias 0 \-unidirectional 1 \-chimera_perc 20 \-chimera_dist 90 10
-.Ve
-.IP "17." 4
-Three 16S rRNA amplicon libraries with specified MIDs and no reference sequences
-in common
-.Sp
-.Vb 1
-\&   grinder \-reference_file 16Sgenes.fna \-forward_reverse 16Sprimers.fna \-length_bias 0 \-unidirectional 1 \-num_libraries 3 \-multiplex_ids MIDs.fna
-.Ve
-.IP "18." 4
-Reading reference sequences from the standard input, which allows you to
-decompress \s-1FASTA\s0 files on the fly:
-.Sp
-.Vb 1
-\&   zcat microbial_db.fna.gz | grinder \-reference_file \- \-total_reads 100
-.Ve
-.SH "CLI REQUIRED ARGUMENTS"
-.IX Header "CLI REQUIRED ARGUMENTS"
-.IP "\-rf <reference_file> | \-reference_file <reference_file> | \-gf <reference_file> | \-genome_file <reference_file>" 4
-.IX Item "-rf <reference_file> | -reference_file <reference_file> | -gf <reference_file> | -genome_file <reference_file>"
-\&\s-1FASTA\s0 file that contains the input reference sequences (full genomes, 16S rRNA
-genes, transcripts, proteins...) or '\-' to read them from the standard input. See the
-\&\s-1README\s0 file for examples of databases you can use and where to get them from. 
-Default: \-
-.SH "CLI OPTIONAL ARGUMENTS"
-.IX Header "CLI OPTIONAL ARGUMENTS"
-.IP "\-tr <total_reads> | \-total_reads <total_reads>" 4
-.IX Item "-tr <total_reads> | -total_reads <total_reads>"
-Number of shotgun or amplicon reads to generate for each library. Do not specify
-this if you specify the fold coverage. Default: 100
-.IP "\-cf <coverage_fold> | \-coverage_fold <coverage_fold>" 4
-.IX Item "-cf <coverage_fold> | -coverage_fold <coverage_fold>"
-Desired fold coverage of the input reference sequences (the output \s-1FASTA\s0 length
-divided by the input \s-1FASTA\s0 length). Do not specify this if you specify the number
-of reads directly.
-.IP "\-rd <read_dist>... | \-read_dist <read_dist>..." 4
-.IX Item "-rd <read_dist>... | -read_dist <read_dist>..."
-Desired shotgun or amplicon read length distribution specified as:
-   average length, distribution ('uniform' or 'normal') and standard deviation.
-.Sp
-Only the first element is required. Examples:
-.Sp
-.Vb 6
-\&  All reads exactly 101 bp long (Illumina GA 2x): 101
-\&  Uniform read distribution around 100+\-10 bp: 100 uniform 10
-\&  Reads normally distributed with an average of 800 and a standard deviation of 100
-\&    bp (Sanger reads): 800 normal 100
-\&  Reads normally distributed with an average of 450 and a standard deviation of 50
-\&    bp (454 GS\-FLX Ti): 450 normal 50
-.Ve
-.Sp
-Reference sequences smaller than the specified read length are not used. Default:
-100
-.IP "\-id <insert_dist>... | \-insert_dist <insert_dist>..." 4
-.IX Item "-id <insert_dist>... | -insert_dist <insert_dist>..."
-Create paired-end or mate-pair reads spanning the given insert length.
-Important: the insert is defined in the biological sense, i.e. its length includes
-the length of both reads and of the stretch of \s-1DNA\s0 between them:
-   0 : off,
-   or: insert size distribution in bp, in the same format as the read length
-       distribution (a typical value is 2,500 bp)
-Two distinct reads are generated whether or not the mate pair overlaps. Default:
-0
-.IP "\-mo <mate_orientation> | \-mate_orientation <mate_orientation>" 4
-.IX Item "-mo <mate_orientation> | -mate_orientation <mate_orientation>"
-When generating paired-end or mate-pair reads (see <insert_dist>), specify the
-orientation of the reads (F: forward, R: reverse):
-.Sp
-.Vb 4
-\&   FR:  \-\-\-> <\-\-\-  e.g. Sanger, Illumina paired\-end
-\&   FF:  \-\-\-> \-\-\->  e.g. 454
-\&   RF:  <\-\-\- \-\-\->  e.g. Illumina mate\-pairs
-\&   RR:  <\-\-\- <\-\-\-
-.Ve
-.Sp
-Default: \s-1FR\s0
-.IP "\-ec <exclude_chars> | \-exclude_chars <exclude_chars>" 4
-.IX Item "-ec <exclude_chars> | -exclude_chars <exclude_chars>"
-Do not create reads containing any of the specified characters (case 
-insensitive), e.g. 'N\-' to prevent reads with gaps (\-) or ambiguities (N). 
-Default: ''
-.IP "\-dc <delete_chars> | \-delete_chars <delete_chars>" 4
-.IX Item "-dc <delete_chars> | -delete_chars <delete_chars>"
-Remove the specified characters from the reference sequences (case-insensitive),
-e.g. 'N\-' to renove gaps (\-) and ambiguities (N). Default:
-.IP "\-fr <forward_reverse> | \-forward_reverse <forward_reverse>" 4
-.IX Item "-fr <forward_reverse> | -forward_reverse <forward_reverse>"
-Use \s-1DNA\s0 amplicon sequencing using a forward and reverse \s-1PCR\s0 primer sequence
-provided in a \s-1FASTA\s0 file. The reference sequences and their reverse complement
-will be searched for \s-1PCR\s0 primer matches. The primer sequences should use the
-\&\s-1IUPAC\s0 convention for degenerate residues and the reference sequences that that
-do not match the specified primers are excluded. If your reference sequences are
-full genomes, it is recommended to use <copy_bias> = 1 and <length_bias> = 0 to
-generate amplicon reads. To sequence from the forward strand, set <unidirectional>
-to 1 and put the forward primer first and reverse primer second in the \s-1FASTA\s0
-file. To sequence from the reverse strand, invert the primers in the \s-1FASTA\s0 file
-and use <unidirectional> = \-1. The second primer sequence in the \s-1FASTA\s0 file is
-always optional. Example: \s-1AAACTYAAAKGAATTGRCGG\s0 and \s-1ACGGGCGGTGTGTRC\s0 for the 926F
-and 1392R primers that target the V6 to V9 region of the 16S rRNA gene.
-.IP "\-un <unidirectional> | \-unidirectional <unidirectional>" 4
-.IX Item "-un <unidirectional> | -unidirectional <unidirectional>"
-Instead of producing reads bidirectionally, from the reference strand and its
-reverse complement, proceed unidirectionally, from one strand only (forward or
-reverse). Values: 0 (off, i.e. bidirectional), 1 (forward), \-1 (reverse). Use
-<unidirectional> = 1 for strand-specific transcriptomic or proteomic datasets.
-Default: 0
-.IP "\-lb <length_bias> | \-length_bias <length_bias>" 4
-.IX Item "-lb <length_bias> | -length_bias <length_bias>"
-In shotgun libraries, sample reference sequences proportionally to their length.
-For example, in simulated microbial datasets, this means that at the same
-relative abundance, larger genomes contribute more reads than smaller genomes
-(and all genomes have the same fold coverage).
-0 = no, 1 = yes. Default: 1
-.IP "\-cb <copy_bias> | \-copy_bias <copy_bias>" 4
-.IX Item "-cb <copy_bias> | -copy_bias <copy_bias>"
-In amplicon libraries where full genomes are used as input, sample species
-proportionally to the number of copies of the target gene: at equal relative
-abundance, genomes that have multiple copies of the target gene contribute more
-amplicon reads than genomes that have a single copy. 0 = no, 1 = yes. Default:
-1
-.IP "\-md <mutation_dist>... | \-mutation_dist <mutation_dist>..." 4
-.IX Item "-md <mutation_dist>... | -mutation_dist <mutation_dist>..."
-Introduce sequencing errors in the reads, under the form of mutations
-(substitutions, insertions and deletions) at positions that follow a specified
-distribution (with replacement): model (uniform, linear, poly4), model parameters.
-For example, for a uniform 0.1% error rate, use: uniform 0.1. To simulate Sanger
-errors, use a linear model where the errror rate is 1% at the 5' end of reads and
-2% at the 3' end: linear 1 2. To model Illumina errors using the 4th degree
-polynome 3e\-3 + 3.3e\-8 * i^4 (Korbel et al 2009), use: poly4 3e\-3 3.3e\-8.
-Use the <mutation_ratio> option to alter how many of these mutations are
-substitutions or indels. Default: uniform 0 0
-.IP "\-mr <mutation_ratio>... | \-mutation_ratio <mutation_ratio>..." 4
-.IX Item "-mr <mutation_ratio>... | -mutation_ratio <mutation_ratio>..."
-Indicate the percentage of substitutions and the number of indels (insertions
-and deletions). For example, use '80 20' (4 substitutions for each indel) for
-Sanger reads. Note that this parameter has no effect unless you specify the
-<mutation_dist> option. Default: 80 20
-.IP "\-hd <homopolymer_dist> | \-homopolymer_dist <homopolymer_dist>" 4
-.IX Item "-hd <homopolymer_dist> | -homopolymer_dist <homopolymer_dist>"
-Introduce sequencing errors in the reads under the form of homopolymeric
-stretches (e.g. \s-1AAA\s0, \s-1CCCCC\s0) using a specified model where the homopolymer length
-follows a normal distribution N(mean, standard deviation) that is function of
-the homopolymer length n:
-.Sp
-.Vb 3
-\&  Margulies: N(n, 0.15 * n)              ,  Margulies et al. 2005.
-\&  Richter  : N(n, 0.15 * sqrt(n))        ,  Richter et al. 2008.
-\&  Balzer   : N(n, 0.03494 + n * 0.06856) ,  Balzer et al. 2010.
-.Ve
-.Sp
-Default: 0
-.IP "\-cp <chimera_perc> | \-chimera_perc <chimera_perc>" 4
-.IX Item "-cp <chimera_perc> | -chimera_perc <chimera_perc>"
-Specify the percent of reads in amplicon libraries that should be chimeric
-sequences. The 'reference' field in the description of chimeric reads will
-contain the \s-1ID\s0 of all the reference sequences forming the chimeric template. A
-typical value is 10%. Default: 0 %
-.IP "\-cd <chimera_dist>... | \-chimera_dist <chimera_dist>..." 4
-.IX Item "-cd <chimera_dist>... | -chimera_dist <chimera_dist>..."
-Specify the distribution of chimeras: bimeras, trimeras, quadrameras and
-multimeras of higher order. The default is the average values from Quince et al.
-2011: '314 38 1', which corresponds to 89% of bimeras, 11% of trimeras and 0.3%
-of quadrameras. Note that this option only takes effect when you request the
-generation of chimeras with the <chimera_perc> option. Default: 314 38 1
-.IP "\-ck <chimera_kmer> | \-chimera_kmer <chimera_kmer>" 4
-.IX Item "-ck <chimera_kmer> | -chimera_kmer <chimera_kmer>"
-Activate a method to form chimeras by picking breakpoints at places where k\-mers
-are shared between sequences. <chimera_kmer> represents k, the length of the
-k\-mers (in bp). The longer the kmer, the more similar the sequences have to be
-to be eligible to form chimeras. The more frequent a k\-mer is in the pool of
-reference sequences (taking into account their relative abundance), the more
-often this k\-mer will be chosen. For example, \s-1CHSIM\s0 (Edgar et al. 2011) uses this
-method with a k\-mer length of 10 bp. If you do not want to use k\-mer information
-to form chimeras, use 0, which will result in the reference sequences and
-breakpoints to be taken randomly on the \*(L"aligned\*(R" reference sequences. Note that
-this option only takes effect when you request the generation of chimeras with
-the <chimera_perc> option. Also, this options is quite memory intensive, so you
-should probably limit yourself to a relatively small number of reference sequences
-if you want to use it. Default: 10 bp
-.IP "\-af <abundance_file> | \-abundance_file <abundance_file>" 4
-.IX Item "-af <abundance_file> | -abundance_file <abundance_file>"
-Specify the relative abundance of the reference sequences manually in an input
-file. Each line of the file should contain a sequence name and its relative
-abundance (%), e.g. 'seqABC 82.1' or 'seqABC 82.1 10.2' if you are specifying two
-different libraries.
-.IP "\-am <abundance_model>... | \-abundance_model <abundance_model>..." 4
-.IX Item "-am <abundance_model>... | -abundance_model <abundance_model>..."
-Relative abundance model for the input reference sequences: uniform, linear, powerlaw,
-logarithmic or exponential. The uniform and linear models do not require a
-parameter, but the other models take a parameter in the range [0, infinity). If
-this parameter is not specified, then it is randomly chosen. Examples:
-.Sp
-.Vb 3
-\&  uniform distribution: uniform
-\&  powerlaw distribution with parameter 0.1: powerlaw 0.1
-\&  exponential distribution with automatically chosen parameter: exponential
-.Ve
-.Sp
-Default: uniform 1
-.IP "\-nl <num_libraries> | \-num_libraries <num_libraries>" 4
-.IX Item "-nl <num_libraries> | -num_libraries <num_libraries>"
-Number of independent libraries to create. Specify how diverse and similar they
-should be with <diversity>, <shared_perc> and <permuted_perc>. Assign them
-different \s-1MID\s0 tags with <multiplex_mids>. Default: 1
-.IP "\-mi <multiplex_ids> | \-multiplex_ids <multiplex_ids>" 4
-.IX Item "-mi <multiplex_ids> | -multiplex_ids <multiplex_ids>"
-Specify an optional \s-1FASTA\s0 file that contains multiplex sequence identifiers
-(a.k.a MIDs or barcodes) to add to the sequences (one sequence per library). The MIDs
-are included in the length specified with the \-read_dist option.
-.IP "\-di <diversity>... | \-diversity <diversity>..." 4
-.IX Item "-di <diversity>... | -diversity <diversity>..."
-Richness, or number of reference sequences to include in the shotgun libraries.
-Use 0 for the maximum diversity possible (based on the number of reference sequences
-available). Provide one value to make all libraries have the same diversity, or
-one diversity value per library otherwise. Default: 0
-.IP "\-sp <shared_perc> | \-shared_perc <shared_perc>" 4
-.IX Item "-sp <shared_perc> | -shared_perc <shared_perc>"
-For multiple libraries, percent of reference sequences they should have in common
-(relative to the diversity of the least diverse library). Default: 0 %
-.IP "\-pp <permuted_perc> | \-permuted_perc <permuted_perc>" 4
-.IX Item "-pp <permuted_perc> | -permuted_perc <permuted_perc>"
-For multiple libraries, percent of the most-abundant reference sequences to permute
-in rank-abundance. Default: 0 %
-.IP "\-rs <random_seed> | \-random_seed <random_seed>" 4
-.IX Item "-rs <random_seed> | -random_seed <random_seed>"
-Seed number to use for the pseudo-random number generator.
-.IP "\-dt <desc_track> | \-desc_track <desc_track>" 4
-.IX Item "-dt <desc_track> | -desc_track <desc_track>"
-Track read information (reference sequence, position, errors, ...) by writing
-it in the read description. Default: 1
-.IP "\-ql <qual_levels>... | \-qual_levels <qual_levels>..." 4
-.IX Item "-ql <qual_levels>... | -qual_levels <qual_levels>..."
-Generate basic quality scores for the simulated reads. Good residues are given a
-specified good score (e.g. 30) and residues that are the result of an insertion
-or substitution are given a specified bad score (e.g. 10). Specify first the
-good score and then the bad score on the command-line, e.g.: 30 10. Default:
-.IP "\-fq <fastq_output> | \-fastq_output <fastq_output>" 4
-.IX Item "-fq <fastq_output> | -fastq_output <fastq_output>"
-Write the generated reads in \s-1FASTQ\s0 format (Sanger variant) instead of \s-1FASTA\s0 and
-\&\s-1QUAL\s0. <qual_levels> need to be specified for this option to be effective. 
-Default: 0
-.IP "\-bn <base_name> | \-base_name <base_name>" 4
-.IX Item "-bn <base_name> | -base_name <base_name>"
-Prefix of the output files. Default: grinder
-.IP "\-od <output_dir> | \-output_dir <output_dir>" 4
-.IX Item "-od <output_dir> | -output_dir <output_dir>"
-Directory where the results should be written. This folder will be created if
-needed. Default: .
-.IP "\-pf <profile_file> | \-profile_file <profile_file>" 4
-.IX Item "-pf <profile_file> | -profile_file <profile_file>"
-A file that contains Grinder arguments. This is useful if you use many options
-or often use the same options. Lines with comments (#) are ignored. Consider the
-profile file, 'simple_profile.txt':
-.Sp
-.Vb 3
-\&  # A simple Grinder profile
-\&  \-read_dist 105 normal 12
-\&  \-total_reads 1000
-.Ve
-.Sp
-Running: grinder \-reference_file viral_genomes.fa \-profile_file simple_profile.txt
-.Sp
-Translates into: grinder \-reference_file viral_genomes.fa \-read_dist 105 normal 12 \-total_reads 1000
-.Sp
-Note that the arguments specified in the profile should not be specified again on the command line.
-.SH "CLI OUTPUT"
-.IX Header "CLI OUTPUT"
-For each shotgun or amplicon read library requested, the following files are
-generated:
-.IP "\(bu" 4
-A rank-abundance file, tab-delimited, that shows the relative abundance of the
-different reference sequences
-.IP "\(bu" 4
-A file containing the read sequences in \s-1FASTA\s0 format. The read headers
-contain information necessary to track from which reference sequence each read
-was taken and what errors it contains. This file is not generated if <fastq_output>
-option was provided.
-.IP "\(bu" 4
-If the <qual_levels> option was specified, a file containing the quality scores
-of the reads (in \s-1QUAL\s0 format).
-.IP "\(bu" 4
-If the <fastq_output> option was provided, a file containing the read sequences
-in \s-1FASTQ\s0 format.
-.SH "API EXAMPLES"
-.IX Header "API EXAMPLES"
-The Grinder \s-1API\s0 allows to conveniently use Grinder within Perl scripts. Here is
-a synopsis:
-.PP
-.Vb 1
-\&  use Grinder;
-\&
-\&  # Set up a new factory (see the OPTIONS section for a complete list of parameters)
-\&  my $factory = Grinder\->new( \-reference_file => \*(Aqgenomes.fna\*(Aq );
-\&
-\&  # Process all shotgun libraries requested
-\&  while ( my $struct = $factory\->next_lib ) {
-\&
-\&    # The ID and abundance of the 3rd most abundant genome in this community
-\&    my $id = $struct\->{ids}\->[2];
-\&    my $ab = $struct\->{abs}\->[2];
-\&
-\&    # Create shotgun reads
-\&    while ( my $read = $factory\->next_read) {
-\&
-\&      # The read is a Bioperl sequence object with these properties:
-\&      my $read_id     = $read\->id;     # read ID given by Grinder
-\&      my $read_seq    = $read\->seq;    # nucleotide sequence
-\&      my $read_mid    = $read\->mid;    # MID or tag attached to the read
-\&      my $read_errors = $read\->errors; # errors that the read contains
-\& 
-\&      # Where was the read taken from? The reference sequence refers to the
-\&      # database sequence for shotgun libraries, amplicon obtained from the
-\&      # database sequence, or could even be a chimeric sequence
-\&      my $ref_id     = $read\->reference\->id; # ID of the reference sequence
-\&      my $ref_start  = $read\->start;         # start of the read on the reference
-\&      my $ref_end    = $read\->end;           # end of the read on the reference
-\&      my $ref_strand = $read\->strand;        # strand of the reference
-\&      
-\&    }
-\&  }
-\&
-\&  # Similarly, for shotgun mate pairs
-\&  my $factory = Grinder\->new( \-reference_file => \*(Aqgenomes.fna\*(Aq,
-\&                              \-insert_dist    => 250            );
-\&  while ( $factory\->next_lib ) {
-\&    while ( my $read = $factory\->next_read ) {
-\&      # The first read is the first mate of the mate pair
-\&      # The second read is the second mate of the mate pair
-\&      # The third read is the first mate of the next mate pair
-\&      # ...
-\&    }
-\&  }
-\&
-\&  # To generate an amplicon library
-\&  my $factory = Grinder\->new( \-reference_file  => \*(Aqgenomes.fna\*(Aq,
-\&                              \-forward_reverse => \*(Aq16Sgenes.fna\*(Aq,
-\&                              \-length_bias     => 0,
-\&                              \-unidirectional  => 1              );
-\&  while ( $factory\->next_lib ) {
-\&    while ( my $read = $factory\->next_read) {
-\&      # ...
-\&    }
-\&  }
-.Ve
-.SH "API METHODS"
-.IX Header "API METHODS"
-The rest of the documentation details the available Grinder \s-1API\s0 methods.
-.SS "new"
-.IX Subsection "new"
-Title   : new
-.PP
-Function: Create a new Grinder factory initialized with the passed arguments.
-          Available parameters described in the \s-1OPTIONS\s0 section.
-.PP
-Usage   : my \f(CW$factory\fR = Grinder\->new( \-reference_file => 'genomes.fna' );
-.PP
-Returns : a new Grinder object
-.SS "next_lib"
-.IX Subsection "next_lib"
-Title   : next_lib
-.PP
-Function: Go to the next shotgun library to process.
-.PP
-Usage   : my \f(CW$struct\fR = \f(CW$factory\fR\->next_lib;
-.PP
-Returns : Community structure to be used for this library, where \f(CW$struct\fR\->{ids}
-          is an array reference containing the IDs of the genome making up the
-          community (sorted by decreasing relative abundance) and \f(CW$struct\fR\->{abs}
-          is an array reference of the genome abundances (in the same order as
-          the IDs).
-.SS "next_read"
-.IX Subsection "next_read"
-Title   : next_read
-.PP
-Function: Create a amplicon or shotgun read  for the current library.
-.PP
-Usage   : my \f(CW$read\fR  = \f(CW$factory\fR\->next_read; # for single read
-          my \f(CW$mate1\fR = \f(CW$factory\fR\->next_read; # for mate pairs
-          my \f(CW$mate2\fR = \f(CW$factory\fR\->next_read;
-.PP
-Returns : A sequence represented as a Bio::Seq::SimulatedRead object
-.SS "get_random_seed"
-.IX Subsection "get_random_seed"
-Title   : get_random_seed
-.PP
-Function: Return the number used to seed the pseudo-random number generator
-.PP
-Usage   : my \f(CW$seed\fR = \f(CW$factory\fR\->get_random_seed;
-.PP
-Returns : seed number
-.SH "COPYRIGHT"
-.IX Header "COPYRIGHT"
-Copyright 2009\-2012 Florent \s-1ANGLY\s0 <florent.angly at gmail.com>
-.PP
-Grinder is free software: you can redistribute it and/or modify
-it under the terms of the \s-1GNU\s0 General Public License (\s-1GPL\s0) as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-Grinder is distributed in the hope that it will be useful,
-but \s-1WITHOUT\s0 \s-1ANY\s0 \s-1WARRANTY\s0; without even the implied warranty of
-\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS\s0 \s-1FOR\s0 A \s-1PARTICULAR\s0 \s-1PURPOSE\s0.  See the
-\&\s-1GNU\s0 General Public License for more details.
-You should have received a copy of the \s-1GNU\s0 General Public License
-along with Grinder.  If not, see <http://www.gnu.org/licenses/>.
-.SH "BUGS"
-.IX Header "BUGS"
-All complex software has bugs lurking in it, and this program is no exception.
-If you find a bug, please report it on the SourceForge Tracker for Grinder:
-<http://sourceforge.net/tracker/?group_id=244196&atid=1124737>
-.PP
-Bug reports, suggestions and patches are welcome. Grinder's code is developed
-on Sourceforge (<http://sourceforge.net/scm/?type=git&group_id=244196>) and is
-under Git revision control. To get started with a patch, do:
-.PP
-.Vb 1
-\&   git clone git://biogrinder.git.sourceforge.net/gitroot/biogrinder/biogrinder
-.Ve
diff --git a/script/grinder b/script/grinder
index 910d02d..4097b14 100755
--- a/script/grinder
+++ b/script/grinder
@@ -1,6 +1,6 @@
 #! /usr/bin/env perl
 
-# This file is part of the Grinder package, copyright 2009-2012
+# This file is part of the Grinder package, copyright 2009,2010,2011,2012
 # Florent Angly <florent.angly at gmail.com>, under the GPLv3 license
 
 # Grinder is a program to create artificial random shotgun and amplicon sequence
diff --git a/script/grinder.pod b/script/grinder.pod
deleted file mode 100755
index 53fccfb..0000000
--- a/script/grinder.pod
+++ /dev/null
@@ -1,854 +0,0 @@
-# This file was automatically generated by Getopt::Euclid. Do not edit it.
-
-=head1 NAME
-
-grinder - A versatile omics shotgun and amplicon sequencing read simulator
-
-=head1 DESCRIPTION
-
-Grinder is a versatile program to create random shotgun and amplicon sequence
-libraries based on DNA, RNA or proteic reference sequences provided in a FASTA
-file.
-
-Grinder can produce genomic, metagenomic, transcriptomic, metatranscriptomic,
-proteomic, metaproteomic shotgun and amplicon datasets from current sequencing
-technologies such as Sanger, 454, Illumina. These simulated datasets can be used
-to test the accuracy of bioinformatic tools under specific hypothesis, e.g. with
-or without sequencing errors, or with low or high community diversity. Grinder
-may also be used to help decide between alternative sequencing methods for a
-sequence-based project, e.g. should the library be paired-end or not, how many
-reads should be sequenced.
-
-Grinder features include:
-
-=over
-
-=item *
-
-shotgun or amplicon read libraries
-
-=item *
-
-omics support to generate genomic, transcriptomic, proteomic,
-metagenomic, metatranscriptomic or metaproteomic datasets
-
-=item *
-
-arbitrary read length distribution and number of reads
-
-=item *
-
-simulation of PCR and sequencing errors (chimeras, point mutations, homopolymers)
-
-=item *
-
-support for paired-end (mate pair) datasets
-
-=item *
-
-specific rank-abundance settings or manually given abundance for each genome, gene or protein
-
-=item *
-
-creation of datasets with a given richness (alpha diversity)
-
-=item *
-
-independent datasets can share a variable number of genomes (beta diversity)
-
-=item *
-
-modeling of the bias created by varying genome lengths or gene copy number
-
-=item *
-
-profile mechanism to store preferred options
-
-=item *
-
-available to biologists or power users through multiple interfaces: GUI, CLI and API
-
-=back
-
-Briefly, given a FASTA file containing reference sequence (genomes, genes,
-transcripts or proteins), Grinder performs the following steps:
-
-=over
-
-=item 1.
-
-Read the reference sequences, and for amplicon datasets, extracts full-length
-reference PCR amplicons using the provided degenerate PCR primers.
-
-=item 2.
-
-Determine the community structure based on the provided alpha diversity (number
-of reference sequences in the library), beta diversity (number of reference
-sequences in common between several independent libraries) and specified rank-
-abundance model.
-
-=item 3.
-
-Take shotgun reads from the reference sequences or amplicon reads from the full-
-length reference PCR amplicons. The reads may be paired-end reads when an insert
-size distribution is specified. The length of the reads depends on the provided
-read length distribution and their abundance depends on the relative abundance
-in the community structure. Genome length may also biases the number of reads to
-take for shotgun datasets at this step. Similarly, for amplicon datasets, the
-number of copies of the target gene in the reference genomes may bias the number
-of reads to take.
-
-=item 4.
-
-Alter reads by inserting sequencing errors (indels, substitutions and homopolymer
-errors) following a position-specific model to simulate reads created by current
-sequencing technologies (Sanger, 454, Illumina). Write the reads and their
-quality scores in FASTA, QUAL and FASTQ files.
-
-=back
-
-=head1 CITATION
-
-If you use Grinder in your research, please cite:
-
-   Angly FE, Willner D, Rohwer F, Hugenholtz P, Tyson GW (2011) Grinder: a
-   versatile sequence simulator for environmental shotgun and amplicon datasets
-   
-In review...
-
-   Angly FE, Willner D, Prieto-Davó A, Edwards RA, Schmieder R, et al. (2009) The
-   GAAS Metagenomic Tool and Its Estimations of Viral and Microbial Average Genome
-   Size in Four Major Biomes. PLoS Comput Biol 5(12): e1000593.
-   
-Available from L<http://dx.doi.org/10.1371/journal.pcbi.1000593>.
-
-=head1 VERSION
-
-This document refers to grinder version 0.4.5 
-
-=head1 AUTHOR
-
-Florent Angly <florent.angly at gmail.com>
-
-=head1 INSTALLATION
-
-=head2 Dependencies
-
-You need to install these dependencies first:
-
-=over
-
-=item *
-
-Perl
-
-L<http://www.perl.com/download.csp>
-
-=item *
-
-make
-
-Many systems have make installed by default. If your system does not, you should
-install the implementation of make of your choice, e.g. GNU make: L<http://www.gnu.org/s/make/>
-
-=back
-
-The following CPAN Perl modules are dependencies that will be installed automatically
-for you:
-
-=over
-
-=item *
-
-Bio::SeqIO, Bio::Root::Root, Bio::Seq::SimulatedRead
-
-Part of the Bioperl package. Bio::Seq::SimulatedReads has not not been released
-yet and is therefore included here.
-
-=item *
-
-Getopt::Euclid (>= 0.2.8)
-
-=item *
-
-Math::Random::MT (>= 1.13)
-
-=back
-
-=head2 Procedure
-
-To install Grinder globally on your system, run the following commands in a
-terminal or command prompt:
-
-On Linux, Unix, MacOS:
-
-   perl Makefile.PL
-   make
-
-And finally, with administrator privileges:
-
-   make install
-
-On Windows, run the same commands but with nmake instead of make.
-
-=head2 No administrator privileges?
-
-If you do not have administrator privileges, Grinder needs to be installed in
-your home directory.
-
-First, follow the instructions to install local::lib
-at L<http://search.cpan.org/~apeiron/local-lib-1.008004/lib/local/lib.pm#The_bootstrapping_technique>. After local::lib is installed, every Perl
-module that you install manually or through the CPAN command-line application
-will be installed in your home directory.
-
-Then, install Grinder by following the instructions detailed in the "Procedure"
-section.
-
-=head1 RUNNING GRINDER
-
-After installation, you can run Grinder using a command-line interface (CLI), 
-an application programming interface (API) or a graphical user interface (GUI)
-in Galaxy.
-
-To get the usage of the CLI, type:
-
-  grinder --help
-
-More information, including the documentation of the Grinder API, which allows
-you to run Grinder from within other Perl programs, is available by typing:
-
-  perldoc Grinder
-
-To run the GUI, refer to the Galaxy documentation at L<http://wiki.g2.bx.psu.edu/FrontPage>.
-
-The 'utils' folder included in the Grinder package contains some utilities:
-
-=over
-
-=item average genome size:
-
-This calculates the average genome size (in bp) of a simulated random library
-produced by Grinder.
-
-=item change_paired_read_orientation:
-
-This reverses the orientation of each second mate-pair read (ID ending in /2)
-in a FASTA file.
-
-=back
-
-=head1 REFERENCE SEQUENCE DATABASE
-
-A variety of FASTA databases can be used as input for Grinder. For example, the
-GreenGenes database (L<http://greengenes.lbl.gov/Download/Sequence_Data/Fasta_data_files/Isolated_named_strains_16S_aligned.fasta>)
-contains over 180,000 16S rRNA clone sequences from various species which would
-be appropriate to produce a 16S rRNA amplicon dataset. A set of over 41,000 OTU
-representative sequences and their affiliation in seven different taxonomic
-sytems can also be used for the same purpose (L<http://greengenes.lbl.gov/Download/OTUs/gg_otus_6oct2010/rep_set/gg_97_otus_6oct2010.fasta>
-and L<http://greengenes.lbl.gov/Download/OTUs/gg_otus_6oct2010/taxonomies/>). The
-RDP (L<http://rdp.cme.msu.edu/download/release10_27_unaligned.fa.gz>) and Silva
-(L<http://www.arb-silva.de/no_cache/download/archive/release_108/Exports/>)
-databases also provide many 16S rRNA sequences and Silva includes eukaryotic
-sequences. While 16S rRNA is a popular gene, datasets containing any type of gene
-could be used in the same fashion to generate simulated amplicon datasets, provided
-appropriate primers are used.
-
-The >2,400 curated microbial genome sequences in the NCBI RefSeq collection
-(L<ftp://ftp.ncbi.nih.gov/refseq/release/microbial/>) would also be suitable for
-producing 16S rRNA simulated datasets (using the adequate primers). However, the
-lower diversity of this database compared to the previous two makes it more
-appropriate for producing artificial microbial metagenomes. Individual genomes
-from this database are also very suitable for the simulation of single or
-double-barreled shotgun libraries. Similarly, the RefSeq database contains
-over 3,100 curated viral sequences (L<ftp://ftp.ncbi.nih.gov/refseq/release/viral/>)
-which can be used to produce artificial viral metagenomes.
-
-Quite a few eukaryotic organisms have been sequenced and their genome or genes
-can be the basis for simulating genomic, transcriptomic (RNA-seq) or proteomic 
-datasets. For example, you can use the human genome available at
-L<ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/>, the human transcripts
-downloadable from L<ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.rna.fna.gz>
-or the human proteome at L<ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.faa.gz>.
-
-=head1 CLI EXAMPLES
-
-Here are a few examples that illustrate the use of Grinder in a terminal:
-
-=over
-
-=item 1.
-
-A shotgun DNA library with a coverage of 0.1X
-
-   grinder -reference_file genomes.fna -coverage_fold 0.1
-
-=item 2.
-
-Same thing but save the result files in a specific folder and with a specific name
-
-   grinder -reference_file genomes.fna -coverage_fold 0.1 -base_name my_name -output_dir my_dir
-
-=item 3.
-
-A DNA shotgun library with 1000 reads
-
-   grinder -reference_file genomes.fna -total_reads 1000
-
-=item 4.
-
-A DNA shotgun library where species are distributed according to a power law
-
-   grinder -reference_file genomes.fna -abundance_model powerlaw 0.1
-
-=item 5.
-
-A DNA shotgun library with 123 species
-
-   grinder -reference_file genomes.fna -diversity 123
-
-=item 6.
-
-Two DNA shotgun libraries that have 50% of the species in common
-
-   grinder -reference_file genomes.fna -num_libraries 2 -shared_perc 50
-
-=item 7.
-
-Two DNA shotgun library with no species in common and distributed according to a
-exponential rank-abundance model. Note that because the parameter value for the
-exponential model is omitted, each library uses a different randomly chosen value:
-
-   grinder -reference_file genomes.fna -num_libraries 2 -abundance_model exponential
-
-=item 8.
-
-A DNA shotgun library where species relative abundances are manually specified
-
-   grinder -reference_file genomes.fna -abundance_file my_abundances.txt
-
-=item 9.
-
-A DNA shotgun library with Sanger reads
-
-   grinder -reference_file genomes.fna -read_dist 800 -mutation_dist linear 1 2 -mutation_ratio 80 20
-
-=item 10.
-
-A DNA shotgun library with first-generation 454 reads
-
-   grinder -reference_file genomes.fna -read_dist 100 normal 10 -homopolymer_dist balzer
-
-=item 11.
-
-A paired-end DNA shotgun library, where the insert size is normally distributed
-around 2.5 kbp and has 0.2 kbp standard deviation
-
-   grinder -reference_file genomes.fna -insert_dist 2500 normal 200
-
-=item 12.
-
-A transcriptomic dataset
-
-   grinder -reference_file transcripts.fna
-
-=item 13.
-
-A unidirectional transcriptomic dataset
-
-   grinder -reference_file transcripts.fna -unidirectional 1
-
-Note the use of -unidirectional 1 to prevent reads to be taken from the reverse-
-complement of the reference sequences.
-
-=item 14.
-
-A proteomic dataset
-
-   grinder -reference_file proteins.faa -unidirectional 1
-
-=item 15.
-
-A 16S rRNA amplicon library
-
-   grinder -reference_file 16Sgenes.fna -forward_reverse 16Sprimers.fna -length_bias 0 -unidirectional 1
-
-Note the use of -length_bias 0 because reference sequence length should not affect
-the relative abundance of amplicons.
-
-=item 16.
-
-The same amplicon library with 20% of chimeric reads (90% bimera, 10% trimera)
-
-   grinder -reference_file 16Sgenes.fna -forward_reverse 16Sprimers.fna -length_bias 0 -unidirectional 1 -chimera_perc 20 -chimera_dist 90 10
-
-=item 17.
-
-Three 16S rRNA amplicon libraries with specified MIDs and no reference sequences
-in common
-
-   grinder -reference_file 16Sgenes.fna -forward_reverse 16Sprimers.fna -length_bias 0 -unidirectional 1 -num_libraries 3 -multiplex_ids MIDs.fna
-
-=item 18.
-
-Reading reference sequences from the standard input, which allows you to
-decompress FASTA files on the fly:
-
-   zcat microbial_db.fna.gz | grinder -reference_file - -total_reads 100
-
-=back
-
-=head1 CLI REQUIRED ARGUMENTS
-
-=over
-
-=item -rf <reference_file> | -reference_file <reference_file> | -gf <reference_file> | -genome_file <reference_file>
-
-FASTA file that contains the input reference sequences (full genomes, 16S rRNA
-genes, transcripts, proteins...) or '-' to read them from the standard input. See the
-README file for examples of databases you can use and where to get them from. 
-Default: -
-
-=back
-
-
-
-=head1 CLI OPTIONAL ARGUMENTS
-
-=over
-
-=item -tr <total_reads> | -total_reads <total_reads>
-
-Number of shotgun or amplicon reads to generate for each library. Do not specify
-this if you specify the fold coverage. Default: 100
-
-=item -cf <coverage_fold> | -coverage_fold <coverage_fold>
-
-Desired fold coverage of the input reference sequences (the output FASTA length
-divided by the input FASTA length). Do not specify this if you specify the number
-of reads directly.
-
-=item -rd <read_dist>... | -read_dist <read_dist>...
-
-Desired shotgun or amplicon read length distribution specified as:
-   average length, distribution ('uniform' or 'normal') and standard deviation.
-
-Only the first element is required. Examples:
-
-  All reads exactly 101 bp long (Illumina GA 2x): 101
-  Uniform read distribution around 100+-10 bp: 100 uniform 10
-  Reads normally distributed with an average of 800 and a standard deviation of 100
-    bp (Sanger reads): 800 normal 100
-  Reads normally distributed with an average of 450 and a standard deviation of 50
-    bp (454 GS-FLX Ti): 450 normal 50
-
-Reference sequences smaller than the specified read length are not used. Default:
-100
-
-=item -id <insert_dist>... | -insert_dist <insert_dist>...
-
-Create paired-end or mate-pair reads spanning the given insert length.
-Important: the insert is defined in the biological sense, i.e. its length includes
-the length of both reads and of the stretch of DNA between them:
-   0 : off,
-   or: insert size distribution in bp, in the same format as the read length
-       distribution (a typical value is 2,500 bp)
-Two distinct reads are generated whether or not the mate pair overlaps. Default:
-0
-
-=item -mo <mate_orientation> | -mate_orientation <mate_orientation>
-
-When generating paired-end or mate-pair reads (see <insert_dist>), specify the
-orientation of the reads (F: forward, R: reverse):
-
-   FR:  ---> <---  e.g. Sanger, Illumina paired-end
-   FF:  ---> --->  e.g. 454
-   RF:  <--- --->  e.g. Illumina mate-pairs
-   RR:  <--- <---
-
-Default: FR
-
-=item -ec <exclude_chars> | -exclude_chars <exclude_chars>
-
-Do not create reads containing any of the specified characters (case 
-insensitive), e.g. 'N-' to prevent reads with gaps (-) or ambiguities (N). 
-Default: ''
-
-=item -dc <delete_chars> | -delete_chars <delete_chars>
-
-Remove the specified characters from the reference sequences (case-insensitive),
-e.g. 'N-' to renove gaps (-) and ambiguities (N). Default: 
-
-=item -fr <forward_reverse> | -forward_reverse <forward_reverse>
-
-Use DNA amplicon sequencing using a forward and reverse PCR primer sequence
-provided in a FASTA file. The reference sequences and their reverse complement
-will be searched for PCR primer matches. The primer sequences should use the
-IUPAC convention for degenerate residues and the reference sequences that that
-do not match the specified primers are excluded. If your reference sequences are
-full genomes, it is recommended to use <copy_bias> = 1 and <length_bias> = 0 to
-generate amplicon reads. To sequence from the forward strand, set <unidirectional>
-to 1 and put the forward primer first and reverse primer second in the FASTA
-file. To sequence from the reverse strand, invert the primers in the FASTA file
-and use <unidirectional> = -1. The second primer sequence in the FASTA file is
-always optional. Example: AAACTYAAAKGAATTGRCGG and ACGGGCGGTGTGTRC for the 926F
-and 1392R primers that target the V6 to V9 region of the 16S rRNA gene.
-
-=item -un <unidirectional> | -unidirectional <unidirectional>
-
-Instead of producing reads bidirectionally, from the reference strand and its
-reverse complement, proceed unidirectionally, from one strand only (forward or
-reverse). Values: 0 (off, i.e. bidirectional), 1 (forward), -1 (reverse). Use
-<unidirectional> = 1 for strand-specific transcriptomic or proteomic datasets.
-Default: 0
-
-=item -lb <length_bias> | -length_bias <length_bias>
-
-In shotgun libraries, sample reference sequences proportionally to their length.
-For example, in simulated microbial datasets, this means that at the same
-relative abundance, larger genomes contribute more reads than smaller genomes
-(and all genomes have the same fold coverage).
-0 = no, 1 = yes. Default: 1
-
-=item -cb <copy_bias> | -copy_bias <copy_bias>
-
-In amplicon libraries where full genomes are used as input, sample species
-proportionally to the number of copies of the target gene: at equal relative
-abundance, genomes that have multiple copies of the target gene contribute more
-amplicon reads than genomes that have a single copy. 0 = no, 1 = yes. Default:
-1
-
-=item -md <mutation_dist>... | -mutation_dist <mutation_dist>...
-
-Introduce sequencing errors in the reads, under the form of mutations
-(substitutions, insertions and deletions) at positions that follow a specified
-distribution (with replacement): model (uniform, linear, poly4), model parameters.
-For example, for a uniform 0.1% error rate, use: uniform 0.1. To simulate Sanger
-errors, use a linear model where the errror rate is 1% at the 5' end of reads and
-2% at the 3' end: linear 1 2. To model Illumina errors using the 4th degree
-polynome 3e-3 + 3.3e-8 * i^4 (Korbel et al 2009), use: poly4 3e-3 3.3e-8.
-Use the <mutation_ratio> option to alter how many of these mutations are
-substitutions or indels. Default: uniform 0 0
-
-=item -mr <mutation_ratio>... | -mutation_ratio <mutation_ratio>...
-
-Indicate the percentage of substitutions and the number of indels (insertions
-and deletions). For example, use '80 20' (4 substitutions for each indel) for
-Sanger reads. Note that this parameter has no effect unless you specify the
-<mutation_dist> option. Default: 80 20
-
-=item -hd <homopolymer_dist> | -homopolymer_dist <homopolymer_dist>
-
-Introduce sequencing errors in the reads under the form of homopolymeric
-stretches (e.g. AAA, CCCCC) using a specified model where the homopolymer length
-follows a normal distribution N(mean, standard deviation) that is function of
-the homopolymer length n:
-
-  Margulies: N(n, 0.15 * n)              ,  Margulies et al. 2005.
-  Richter  : N(n, 0.15 * sqrt(n))        ,  Richter et al. 2008.
-  Balzer   : N(n, 0.03494 + n * 0.06856) ,  Balzer et al. 2010.
-
-Default: 0
-
-=item -cp <chimera_perc> | -chimera_perc <chimera_perc>
-
-Specify the percent of reads in amplicon libraries that should be chimeric
-sequences. The 'reference' field in the description of chimeric reads will
-contain the ID of all the reference sequences forming the chimeric template. A
-typical value is 10%. Default: 0 %
-
-=item -cd <chimera_dist>... | -chimera_dist <chimera_dist>...
-
-Specify the distribution of chimeras: bimeras, trimeras, quadrameras and
-multimeras of higher order. The default is the average values from Quince et al.
-2011: '314 38 1', which corresponds to 89% of bimeras, 11% of trimeras and 0.3%
-of quadrameras. Note that this option only takes effect when you request the
-generation of chimeras with the <chimera_perc> option. Default: 314 38 1
-
-=item -ck <chimera_kmer> | -chimera_kmer <chimera_kmer>
-
-Activate a method to form chimeras by picking breakpoints at places where k-mers
-are shared between sequences. <chimera_kmer> represents k, the length of the
-k-mers (in bp). The longer the kmer, the more similar the sequences have to be
-to be eligible to form chimeras. The more frequent a k-mer is in the pool of
-reference sequences (taking into account their relative abundance), the more
-often this k-mer will be chosen. For example, CHSIM (Edgar et al. 2011) uses this
-method with a k-mer length of 10 bp. If you do not want to use k-mer information
-to form chimeras, use 0, which will result in the reference sequences and
-breakpoints to be taken randomly on the "aligned" reference sequences. Note that
-this option only takes effect when you request the generation of chimeras with
-the <chimera_perc> option. Also, this options is quite memory intensive, so you
-should probably limit yourself to a relatively small number of reference sequences
-if you want to use it. Default: 10 bp
-
-=item -af <abundance_file> | -abundance_file <abundance_file>
-
-Specify the relative abundance of the reference sequences manually in an input
-file. Each line of the file should contain a sequence name and its relative
-abundance (%), e.g. 'seqABC 82.1' or 'seqABC 82.1 10.2' if you are specifying two
-different libraries.
-
-=item -am <abundance_model>... | -abundance_model <abundance_model>...
-
-Relative abundance model for the input reference sequences: uniform, linear, powerlaw,
-logarithmic or exponential. The uniform and linear models do not require a
-parameter, but the other models take a parameter in the range [0, infinity). If
-this parameter is not specified, then it is randomly chosen. Examples:
-
-  uniform distribution: uniform
-  powerlaw distribution with parameter 0.1: powerlaw 0.1
-  exponential distribution with automatically chosen parameter: exponential
-
-Default: uniform 1
-
-=item -nl <num_libraries> | -num_libraries <num_libraries>
-
-Number of independent libraries to create. Specify how diverse and similar they
-should be with <diversity>, <shared_perc> and <permuted_perc>. Assign them
-different MID tags with <multiplex_mids>. Default: 1
-
-=item -mi <multiplex_ids> | -multiplex_ids <multiplex_ids>
-
-Specify an optional FASTA file that contains multiplex sequence identifiers
-(a.k.a MIDs or barcodes) to add to the sequences (one sequence per library). The MIDs
-are included in the length specified with the -read_dist option.
-
-=item -di <diversity>... | -diversity <diversity>...
-
-Richness, or number of reference sequences to include in the shotgun libraries.
-Use 0 for the maximum diversity possible (based on the number of reference sequences
-available). Provide one value to make all libraries have the same diversity, or
-one diversity value per library otherwise. Default: 0
-
-=item -sp <shared_perc> | -shared_perc <shared_perc>
-
-For multiple libraries, percent of reference sequences they should have in common
-(relative to the diversity of the least diverse library). Default: 0 %
-
-=item -pp <permuted_perc> | -permuted_perc <permuted_perc>
-
-For multiple libraries, percent of the most-abundant reference sequences to permute
-in rank-abundance. Default: 0 %
-
-=item -rs <random_seed> | -random_seed <random_seed>
-
-Seed number to use for the pseudo-random number generator.
-
-=item -dt <desc_track> | -desc_track <desc_track>
-
-Track read information (reference sequence, position, errors, ...) by writing
-it in the read description. Default: 1
-
-=item -ql <qual_levels>... | -qual_levels <qual_levels>...
-
-Generate basic quality scores for the simulated reads. Good residues are given a
-specified good score (e.g. 30) and residues that are the result of an insertion
-or substitution are given a specified bad score (e.g. 10). Specify first the
-good score and then the bad score on the command-line, e.g.: 30 10. Default:
-
-
-=item -fq <fastq_output> | -fastq_output <fastq_output>
-
-Write the generated reads in FASTQ format (Sanger variant) instead of FASTA and
-QUAL. <qual_levels> need to be specified for this option to be effective. 
-Default: 0
-
-=item -bn <base_name> | -base_name <base_name>
-
-Prefix of the output files. Default: grinder
-
-=item -od <output_dir> | -output_dir <output_dir>
-
-Directory where the results should be written. This folder will be created if
-needed. Default: .
-
-=item -pf <profile_file> | -profile_file <profile_file>
-
-A file that contains Grinder arguments. This is useful if you use many options
-or often use the same options. Lines with comments (#) are ignored. Consider the
-profile file, 'simple_profile.txt':
-
-  # A simple Grinder profile
-  -read_dist 105 normal 12
-  -total_reads 1000
-
-Running: grinder -reference_file viral_genomes.fa -profile_file simple_profile.txt
-
-Translates into: grinder -reference_file viral_genomes.fa -read_dist 105 normal 12 -total_reads 1000
-
-Note that the arguments specified in the profile should not be specified again on the command line.
-
-=back
-
-
-
-=head1 CLI OUTPUT
-
-For each shotgun or amplicon read library requested, the following files are
-generated:
-
-=over
-
-=item *
-
-A rank-abundance file, tab-delimited, that shows the relative abundance of the
-different reference sequences
-
-=item *
-
-A file containing the read sequences in FASTA format. The read headers
-contain information necessary to track from which reference sequence each read
-was taken and what errors it contains. This file is not generated if <fastq_output>
-option was provided.
-
-=item *
-
-If the <qual_levels> option was specified, a file containing the quality scores
-of the reads (in QUAL format).
-
-=item *
-
-If the <fastq_output> option was provided, a file containing the read sequences
-in FASTQ format.
-
-=back
-
-=head1 API EXAMPLES
-
-The Grinder API allows to conveniently use Grinder within Perl scripts. Here is
-a synopsis:
-
-  use Grinder;
-
-  # Set up a new factory (see the OPTIONS section for a complete list of parameters)
-  my $factory = Grinder->new( -reference_file => 'genomes.fna' );
-
-  # Process all shotgun libraries requested
-  while ( my $struct = $factory->next_lib ) {
-
-    # The ID and abundance of the 3rd most abundant genome in this community
-    my $id = $struct->{ids}->[2];
-    my $ab = $struct->{abs}->[2];
-
-    # Create shotgun reads
-    while ( my $read = $factory->next_read) {
-
-      # The read is a Bioperl sequence object with these properties:
-      my $read_id     = $read->id;     # read ID given by Grinder
-      my $read_seq    = $read->seq;    # nucleotide sequence
-      my $read_mid    = $read->mid;    # MID or tag attached to the read
-      my $read_errors = $read->errors; # errors that the read contains
- 
-      # Where was the read taken from? The reference sequence refers to the
-      # database sequence for shotgun libraries, amplicon obtained from the
-      # database sequence, or could even be a chimeric sequence
-      my $ref_id     = $read->reference->id; # ID of the reference sequence
-      my $ref_start  = $read->start;         # start of the read on the reference
-      my $ref_end    = $read->end;           # end of the read on the reference
-      my $ref_strand = $read->strand;        # strand of the reference
-      
-    }
-  }
-
-  # Similarly, for shotgun mate pairs
-  my $factory = Grinder->new( -reference_file => 'genomes.fna',
-                              -insert_dist    => 250            );
-  while ( $factory->next_lib ) {
-    while ( my $read = $factory->next_read ) {
-      # The first read is the first mate of the mate pair
-      # The second read is the second mate of the mate pair
-      # The third read is the first mate of the next mate pair
-      # ...
-    }
-  }
-
-  # To generate an amplicon library
-  my $factory = Grinder->new( -reference_file  => 'genomes.fna',
-                              -forward_reverse => '16Sgenes.fna',
-                              -length_bias     => 0,
-                              -unidirectional  => 1              );
-  while ( $factory->next_lib ) {
-    while ( my $read = $factory->next_read) {
-      # ...
-    }
-  }
-
-=head1 API METHODS
-
-The rest of the documentation details the available Grinder API methods.
-
-=head2 new
-
-Title   : new
-
-Function: Create a new Grinder factory initialized with the passed arguments.
-          Available parameters described in the OPTIONS section.
-
-Usage   : my $factory = Grinder->new( -reference_file => 'genomes.fna' );
-
-Returns : a new Grinder object
-
-=head2 next_lib
-
-Title   : next_lib
-
-Function: Go to the next shotgun library to process.
-
-Usage   : my $struct = $factory->next_lib;
-
-Returns : Community structure to be used for this library, where $struct->{ids}
-          is an array reference containing the IDs of the genome making up the
-          community (sorted by decreasing relative abundance) and $struct->{abs}
-          is an array reference of the genome abundances (in the same order as
-          the IDs).
-
-=head2 next_read
-
-Title   : next_read
-
-Function: Create a amplicon or shotgun read  for the current library.
-
-Usage   : my $read  = $factory->next_read; # for single read
-          my $mate1 = $factory->next_read; # for mate pairs
-          my $mate2 = $factory->next_read;
-
-Returns : A sequence represented as a Bio::Seq::SimulatedRead object
-
-=head2 get_random_seed
-
-Title   : get_random_seed
-
-Function: Return the number used to seed the pseudo-random number generator
-
-Usage   : my $seed = $factory->get_random_seed;
-
-Returns : seed number
-
-
-=head1 COPYRIGHT
-
-Copyright 2009-2012 Florent ANGLY <florent.angly at gmail.com>
-
-Grinder is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License (GPL) as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-Grinder is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with Grinder.  If not, see <http://www.gnu.org/licenses/>.
-
-=head1 BUGS
-
-All complex software has bugs lurking in it, and this program is no exception.
-If you find a bug, please report it on the SourceForge Tracker for Grinder:
-L<http://sourceforge.net/tracker/?group_id=244196&atid=1124737>
-
-Bug reports, suggestions and patches are welcome. Grinder's code is developed
-on Sourceforge (L<http://sourceforge.net/scm/?type=git&group_id=244196>) and is
-under Git revision control. To get started with a patch, do:
-
-   git clone git://biogrinder.git.sourceforge.net/gitroot/biogrinder/biogrinder
-
-
diff --git a/t/01-shotgun.t b/t/01-shotgun.t
index 2ccdb84..a14389b 100644
--- a/t/01-shotgun.t
+++ b/t/01-shotgun.t
@@ -13,7 +13,7 @@ my ($factory, $nof_reads, $read);
 # Initialization with short argument
 
 ok $factory = Grinder->new(
-   -rf => data('shotgun_database_extended.fa'),
+   -rf => data('shotgun_database.fa'),
    -tr => 10                         ,
 ), 'Shotgun & short arguments';
 
@@ -22,9 +22,9 @@ ok $factory->next_read;
 # Long argument
 
 ok $factory = Grinder->new(
-   -reference_file => data('shotgun_database_extended.fa'),
-   -read_dist      => 48                                  ,
-   -total_reads    => 100                                 ,
+   -reference_file => data('shotgun_database.fa'),
+   -read_dist      => 48                         ,
+   -total_reads    => 100                        ,
 ), 'Long arguments';
 
 ok $factory->next_lib;
diff --git a/t/02-mates.t b/t/02-mates.t
index 5f0c457..e725d5b 100644
--- a/t/02-mates.t
+++ b/t/02-mates.t
@@ -11,10 +11,10 @@ use Grinder;
 my ($factory, $read, $nof_reads);
 
 ok $factory = Grinder->new(
-   -reference_file => data('shotgun_database_extended.fa'),
-   -total_reads    => 100                                 ,
-   -read_dist      => 48                                  ,
-   -insert_dist    => 250                                 ,
+   -reference_file => data('shotgun_database.fa'),
+   -total_reads    => 100                        ,
+   -read_dist      => 48                         ,
+   -insert_dist    => 250                        ,
 ), 'Mate pairs';
 
 ok $factory->next_lib;
diff --git a/t/04-abundances.t b/t/04-abundances.t
index 02e0069..d39cbc1 100644
--- a/t/04-abundances.t
+++ b/t/04-abundances.t
@@ -14,11 +14,11 @@ my ($factory, $nof_reads, $read, %sources);
 # Specified genome abundance for a single shotgun library
 
 ok $factory = Grinder->new(
-   -reference_file => data('shotgun_database_extended.fa'),
-   -abundance_file => data('abundances.txt')              ,
-   -length_bias    => 0                                   ,
-   -random_seed    => 1910567890                          ,
-   -total_reads    => 1000                                ,
+   -reference_file => data('shotgun_database.fa'),
+   -abundance_file => data('abundances.txt')     ,
+   -length_bias    => 0                          ,
+   -random_seed    => 1910567890                 ,
+   -total_reads    => 1000                       ,
 ), 'Genome abundance for a single shotgun libraries';
 
 while ( $read = $factory->next_read ) {
@@ -91,11 +91,11 @@ is $factory->next_lib, undef;
 # Specified genome abundance for multiple shotgun libraries
 
 ok $factory = Grinder->new(
-   -reference_file => data('shotgun_database_extended.fa'),
-   -abundance_file => data('abundances_multiple.txt')     ,
-   -length_bias    => 0                                   ,
-   -random_seed    => 1232567890                          ,
-   -total_reads    => 1000                                ,
+   -reference_file => data('shotgun_database.fa')    ,
+   -abundance_file => data('abundances_multiple.txt'),
+   -length_bias    => 0                              ,
+   -random_seed    => 1232567890                     ,
+   -total_reads    => 1000                           ,
 ), 'Genome abundance for multiple shotgun libraries';
 
 ok $factory->next_lib;
diff --git a/t/06-seed.t b/t/06-seed.t
index fa62bf0..0939687 100644
--- a/t/06-seed.t
+++ b/t/06-seed.t
@@ -14,9 +14,9 @@ my ($factory, $seed1, $seed2, $seed3, @dataset1, @dataset2);
 # Seed the pseudo-random number generator
 
 ok $factory = Grinder->new(
-   -reference_file => data('shotgun_database_extended.fa'),
-   -random_seed    => 1233567890                          ,
-   -total_reads    => 10                                  ,
+   -reference_file => data('shotgun_database.fa'),
+   -random_seed    => 1233567890                 ,
+   -total_reads    => 10                         ,
 ), 'Set the seed';
 ok $seed1 = $factory->get_random_seed();
 is $seed1, 1233567890;
diff --git a/t/10-quality.t b/t/10-quality.t
index 195f6bf..2fafcc9 100644
--- a/t/10-quality.t
+++ b/t/10-quality.t
@@ -14,9 +14,9 @@ my ($factory, $nof_reads, $read);
 # Outputing basic quality scores
 
 ok $factory = Grinder->new(
-   -reference_file => data('shotgun_database_extended.fa'),
-   -read_dist      => 52                                  ,
-   -total_reads    => 10                                  ,
+   -reference_file => data('shotgun_database.fa'),
+   -read_dist      => 52                         ,
+   -total_reads    => 10                         ,
 ), 'No quality scores';
 
 ok $read = $factory->next_read;
@@ -24,10 +24,10 @@ is_deeply $read->qual, [];
 
 
 ok $factory = Grinder->new(
-   -reference_file => data('shotgun_database_extended.fa'),
-   -read_dist      => 52                                  ,
-   -total_reads    => 10                                  ,
-   -qual_levels    => '30 10'                             ,
+   -reference_file => data('shotgun_database.fa'),
+   -read_dist      => 52                         ,
+   -total_reads    => 10                         ,
+   -qual_levels    => '30 10'                    ,
 ), 'With quality scores';
 
 ok $read = $factory->next_read;
diff --git a/t/11-tracking.t b/t/11-tracking.t
index 4aa0388..56859a3 100644
--- a/t/11-tracking.t
+++ b/t/11-tracking.t
@@ -14,11 +14,11 @@ my ($factory, $nof_reads, $read);
 # Tracking read information in the read description 
 
 ok $factory = Grinder->new(
-   -reference_file => data('shotgun_database_extended.fa'),
-   -total_reads    => 10                                  ,
-   -unidirectional => 0                                   ,
-   -desc_track     => 1                                   ,
-), 'Bidirectional shotgun tracking';
+   -reference_file => data('shotgun_database.fa'),
+   -total_reads    => 10                         ,
+   -unidirectional => 0                          ,
+   -desc_track     => 1                          ,
+), 'Shotgun tracking';
 
 ok $read = $factory->next_read;
 while ($factory->next_read) {
@@ -27,11 +27,11 @@ while ($factory->next_read) {
 
 
 ok $factory = Grinder->new(
-   -reference_file => data('shotgun_database_extended.fa'),
-   -total_reads    => 10                                  ,
-   -unidirectional => 1                                   ,
-   -desc_track     => 1                                   ,
-), 'Forward shotgun tracking';
+   -reference_file => data('shotgun_database.fa'),
+   -total_reads    => 10                         ,
+   -unidirectional => 1                          ,
+   -desc_track     => 1                          ,
+), 'Shotgun tracking';
 
 ok $read = $factory->next_read;
 while ($factory->next_read) {
@@ -40,11 +40,11 @@ while ($factory->next_read) {
 
 
 ok $factory = Grinder->new(
-   -reference_file => data('shotgun_database_extended.fa'),
-   -total_reads    => 10                                  ,
-   -unidirectional => -1                                  ,
-   -desc_track     => 1                                   ,
-), 'Reverse shotgun tracking';
+   -reference_file => data('shotgun_database.fa'),
+   -total_reads    => 10                         ,
+   -unidirectional => -1                         ,
+   -desc_track     => 1                          ,
+), 'Shotgun tracking';
 
 ok $read = $factory->next_read;
 while ($factory->next_read) {
diff --git a/t/18-amplicon-multiple.t b/t/18-amplicon-multiple.t
index 536ceef..68cc511 100644
--- a/t/18-amplicon-multiple.t
+++ b/t/18-amplicon-multiple.t
@@ -24,23 +24,10 @@ ok $factory = Grinder->new(
 $nof_reads = 0;
 while ( $read = $factory->next_read ) {
    $nof_reads++;
-   $got_amplicons{$read->seq} = undef;
    ok_read_forward_only($read, 1, $nof_reads);
 };
 is $nof_reads, 100;
 
-%expected_amplicons = (
-   'AAACTTAAAGGAATTGACGGaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGTACACACCGCCCGTccccc' => undef,
-   'AAACTTAAAGGAATTGACGGaaaaaaaaaaaaaaaaaaaaaaaggggggggaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGTACACACCGCCCGTggggg' => undef,
-   'AAACTTAAAGGAATTGRCGGttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttGTACACACCGCCCGTggggg' => undef,
-   'AAACTTAAAGGAATTGRCGGttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttGTACACACCGCCCGT'      => undef,
-   'AAACTUAAAGGAATTGACGGaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGTACACACCGCCCGTccccc' => undef,
-);
-
-is_deeply( \%got_amplicons, \%expected_amplicons );
-undef %got_amplicons;
-
-
 # Template with several matching amplicons and forward and reverse primers
 
 ok $factory = Grinder->new(
@@ -59,12 +46,9 @@ while ( $read = $factory->next_read ) {
    ok_read_forward_reverse($read, 1, $nof_reads);
 };
 is $nof_reads, 100;
-
 %expected_amplicons = (
-   'AAACTTAAAGGAATTGACGGaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGTACACACCGCCCGT' => undef,
-   'AAACTTAAAGGAATTGACGGaaaaaaaaaaaaaaaaaaaaaaaggggggggaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGTACACACCGCCCGT' => undef,
-   'AAACTTAAAGGAATTGRCGGttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttGTACACACCGCCCGT' => undef,
    'AAACTUAAAGGAATTGACGGaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGTACACACCGCCCGT' => undef,
+   'AAACTTAAAGGAATTGRCGGttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttGTACACACCGCCCGT' => undef,
 );
 is_deeply( \%got_amplicons, \%expected_amplicons );
 undef %got_amplicons;
@@ -88,17 +72,9 @@ while ( $read = $factory->next_read ) {
    ok_read_forward_reverse($read, 1, $nof_reads);
 };
 is $nof_reads, 100;
-
-%expected_amplicons = (
-   'AAACTUAAAGGAATTGACGGaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGTACACACCGCCCGT' => undef,
-   'AAACTTAAAGGAATTGRCGGttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttGTACACACCGCCCGT' => undef,
-   'AAACTTAAAGGAATTGACGGggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggGTACACACCGCCCGT' => undef,
-);
-
 is_deeply( \%got_amplicons, \%expected_amplicons );
 undef %got_amplicons;
 
-
 done_testing();
 
 
@@ -115,6 +91,8 @@ sub ok_read_forward_reverse {
       is $strand, $req_strand;
    }
    my $readseq = $read->seq;
+   ok (($readseq eq 'AAACTUAAAGGAATTGACGGaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGTACACACCGCCCGT')
+     or ($readseq eq 'AAACTTAAAGGAATTGRCGGttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttGTACACACCGCCCGT'));
    is $read->id, $nof_reads;
    is $read->length, 95;
 }
@@ -130,6 +108,9 @@ sub ok_read_forward_only {
       is $strand, $req_strand;
    }
    my $readseq = $read->seq;
+   ok ( ($readseq eq 'AAACTUAAAGGAATTGACGGaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaGTACACACCGCCCGTccccc' )
+     or ($readseq eq 'AAACTTAAAGGAATTGRCGGttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttGTACACACCGCCCGTggggg')
+     or ($readseq eq 'AAACTTAAAGGAATTGRCGGttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttGTACACACCGCCCGT'     ));
    is $read->id, $nof_reads;
    my $readlength = $read->length;
    ok ( ($readlength == 95) or ($readlength == 100) );
diff --git a/t/20-community-structure.t b/t/20-community-structure.t
index 1e71428..d3b5216 100644
--- a/t/20-community-structure.t
+++ b/t/20-community-structure.t
@@ -11,27 +11,24 @@ use Grinder;
 my ($factory, $nof_reads, $read, @reads, $ra, $era, $coeff, $min, $max, $mean,
     $stddev, $struct, $param1, $param2);
 
-my $nof_refs = 6;
-my $max_refs = 10;
-
 
 # Uniform community structure
 
 ok $factory = Grinder->new(
-   -reference_file  => data('shotgun_database_extended.fa'),
-   -read_dist       => 48                                  ,
-   -length_bias     => 0                                   ,
-   -abundance_model => ('uniform', 0)                      ,
-   -total_reads     => 1000                                ,
+   -reference_file  => data('shotgun_database.fa'),
+   -read_dist       => 48                         ,
+   -length_bias     => 0                          ,
+   -abundance_model => ('uniform', 0)             ,
+   -total_reads     => 1000                       ,
 ), 'Uniform community structure';
 
 while ( $read = $factory->next_read ) {
    push @reads, $read->reference->id;
 }
 
-$ra = rank_abundance(\@reads, $max_refs);
+$ra = rank_abundance(\@reads, 10);
 ($min, $max, $mean, $stddev) = stats($ra);
-$era = uniform_cstruct($max_refs, $nof_refs, 1000);
+$era = uniform_cstruct(10, 5, 1000);
 $coeff = corr_coeff($ra, $era, $mean);
 cmp_ok $coeff, '>', 0.97;
 
@@ -41,20 +38,20 @@ cmp_ok $coeff, '>', 0.97;
 # Linear community structure
 
 ok $factory = Grinder->new(
-   -reference_file  => data('shotgun_database_extended.fa'),
-   -read_dist       => 48                                  ,
-   -length_bias     => 0                                   ,
-   -abundance_model => ('linear', 0)                       ,
-   -total_reads     => 1000                                ,
+   -reference_file  => data('shotgun_database.fa'),
+   -read_dist       => 48                         ,
+   -length_bias     => 0                          ,
+   -abundance_model => ('linear', 0)              ,
+   -total_reads     => 1000                       ,
 ), 'Linear community structure';
 
 while ( $read = $factory->next_read ) {
    push @reads, $read->reference->id;
 }
 
-$ra = rank_abundance(\@reads, $max_refs);
+$ra = rank_abundance(\@reads, 10);
 ($min, $max, $mean, $stddev) = stats($ra);
-$era = linear_cstruct($max_refs, $nof_refs, 1000);
+$era = linear_cstruct(10, 5, 1000);
 $coeff = corr_coeff($ra, $era, $mean);
 cmp_ok $coeff, '>', 0.97;
 
@@ -64,20 +61,20 @@ cmp_ok $coeff, '>', 0.97;
 # Power law community structure
 
 ok $factory = Grinder->new(
-   -reference_file  => data('shotgun_database_extended.fa'),
-   -read_dist       => 48                                  ,
-   -length_bias     => 0                                   ,
-   -abundance_model => ('powerlaw', 0.5)                   ,
-   -total_reads     => 1000                                ,
+   -reference_file  => data('shotgun_database.fa'),
+   -read_dist       => 48                         ,
+   -length_bias     => 0                          ,
+   -abundance_model => ('powerlaw', 0.5)          ,
+   -total_reads     => 1000                       ,
 ), 'Power law community structure';
 
 while ( $read = $factory->next_read ) {
    push @reads, $read->reference->id;
 }
 
-$ra = rank_abundance(\@reads, $max_refs);
+$ra = rank_abundance(\@reads, 10);
 ($min, $max, $mean, $stddev) = stats($ra);
-$era = powerlaw_cstruct($max_refs, $nof_refs, 0.5, 1000);
+$era = powerlaw_cstruct(10, 5, 0.5, 1000);
 $coeff = corr_coeff($ra, $era, $mean);
 cmp_ok $coeff, '>', 0.97;
 
@@ -87,20 +84,20 @@ cmp_ok $coeff, '>', 0.97;
 # Logarithmic community structure
 
 ok $factory = Grinder->new(
-   -reference_file  => data('shotgun_database_extended.fa'),
-   -read_dist       => 48                                  ,
-   -length_bias     => 0                                   ,
-   -abundance_model => ('logarithmic', 0.5)                ,
-   -total_reads     => 1000                                ,
+   -reference_file  => data('shotgun_database.fa'),
+   -read_dist       => 48                         ,
+   -length_bias     => 0                          ,
+   -abundance_model => ('logarithmic', 0.5)       ,
+   -total_reads     => 1000                       ,
 ), 'Logarithmic community structure';
 
 while ( $read = $factory->next_read ) {
    push @reads, $read->reference->id;
 }
 
-$ra = rank_abundance(\@reads, $max_refs);
+$ra = rank_abundance(\@reads, 10);
 ($min, $max, $mean, $stddev) = stats($ra);
-$era = logarithmic_cstruct($max_refs, $nof_refs, 0.5, 1000);
+$era = logarithmic_cstruct(10, 5, 0.5, 1000);
 $coeff = corr_coeff($ra, $era, $mean);
 cmp_ok $coeff, '>', 0.97;
 
@@ -110,11 +107,11 @@ cmp_ok $coeff, '>', 0.97;
 # Exponential community structure
 
 ok $factory = Grinder->new(
-   -reference_file  => data('shotgun_database_extended.fa'),
-   -read_dist       => 48                                  ,
-   -length_bias     => 0                                   ,
-   -abundance_model => ('exponential', 0.5)                ,
-   -total_reads     => 1000                                ,
+   -reference_file  => data('shotgun_database.fa'),
+   -read_dist       => 48                         ,
+   -length_bias     => 0                          ,
+   -abundance_model => ('exponential', 0.5)       ,
+   -total_reads     => 1000                       ,
 ), 'Exponential community structure';
 
 $struct = $factory->next_lib;
@@ -122,9 +119,9 @@ while ( $read = $factory->next_read ) {
    push @reads, $read->reference->id;
 }
 
-$ra = rank_abundance(\@reads, $max_refs);
+$ra = rank_abundance(\@reads, 10);
 ($min, $max, $mean, $stddev) = stats($ra);
-$era = exponential_cstruct($max_refs, $nof_refs, 0.5, 1000);
+$era = exponential_cstruct(10, 5, 0.5, 1000);
 $coeff = corr_coeff($ra, $era, $mean);
 cmp_ok $coeff, '>', 0.97;
 is $struct->{param}, 0.5;
@@ -134,13 +131,13 @@ is $struct->{param}, 0.5;
 # Communities with random structure parameter value
 
 ok $factory = Grinder->new(
-   -reference_file  => data('shotgun_database_extended.fa'),
-   -read_dist       => 48                                  ,
-   -length_bias     => 0                                   ,
-   -num_libraries   => 2                                   ,
-   -shared_perc     => 100                                 ,
-   -abundance_model => ('exponential')                     ,
-   -total_reads     => 1000                                ,
+   -reference_file  => data('shotgun_database.fa'),
+   -read_dist       => 48                         ,
+   -length_bias     => 0                          ,
+   -num_libraries   => 2                          ,
+   -shared_perc     => 100                        ,
+   -abundance_model => ('exponential')            ,
+   -total_reads     => 1000                       ,
 ), 'Communities with random structure parameter value';
 
 $struct = $factory->next_lib;
@@ -148,11 +145,11 @@ while ( $read = $factory->next_read ) {
    push @reads, $read->reference->id;
 }
 
-$ra = rank_abundance(\@reads, $max_refs);
+$ra = rank_abundance(\@reads, 10);
 ($min, $max, $mean, $stddev) = stats($ra);
 $param1 = $struct->{param};
 between_ok( $param1, 0, 1000 );
-$era = exponential_cstruct($max_refs, $nof_refs, $param1, 1000);
+$era = exponential_cstruct(10, 5, $param1, 1000);
 $coeff = corr_coeff($ra, $era, $mean);
 cmp_ok $coeff, '>', 0.97;
 
@@ -163,11 +160,11 @@ while ( $read = $factory->next_read ) {
    push @reads, $read->reference->id;
 }
 
-$ra = rank_abundance(\@reads, $max_refs);
+$ra = rank_abundance(\@reads, 10);
 ($min, $max, $mean, $stddev) = stats($ra);
 $param2 = $struct->{param};
 between_ok( $param2, 0, 1000 );
-$era = exponential_cstruct($max_refs, $nof_refs, $param2, 1000);
+$era = exponential_cstruct(10, 5, $param2, 1000);
 $coeff = corr_coeff($ra, $era, $mean);
 cmp_ok $coeff, '>', 0.97;
 
@@ -175,7 +172,6 @@ isnt $param1, $param2;
 
 @reads = ();
 
-
 done_testing();
 
 
diff --git a/t/26-combined-errors.t b/t/26-combined-errors.t
index d06a14f..eb966b5 100644
--- a/t/26-combined-errors.t
+++ b/t/26-combined-errors.t
@@ -14,17 +14,17 @@ my ($factory, $nof_reads, $read);
 # Combined errors: indels, substitutions, homopolymers, chimeras
 
 ok $factory = Grinder->new(
-   -reference_file   => data('shotgun_database_extended.fa'),
-   -unidirectional   => 1                                   ,
-   -read_dist        => 48                                  ,
-   -total_reads      => 1000                                ,
-   -homopolymer_dist => 'balzer'                            ,
-   -mutation_ratio   => (100, 0)                            ,
-   -mutation_dist    => ('uniform', 10)                     ,
-   -chimera_perc     => 10                                  ,
-   -chimera_dist     => (100)                               ,
-   -chimera_kmer     => 0                                   ,
-), 'Combined errors (uniform)';
+   -reference_file   => data('shotgun_database.fa'),
+   -unidirectional   => 1                          ,
+   -read_dist        => 48                         ,
+   -total_reads      => 1000                       ,
+   -homopolymer_dist => 'balzer'                   ,
+   -mutation_ratio   => (100, 0)                   ,
+   -mutation_dist    => ('uniform', 10)            ,
+   -chimera_perc     => 10                         ,
+   -chimera_dist     => (100)                      ,
+   -chimera_kmer     => 0                          ,
+), 'Combined errors';
 
 $nof_reads = 0;
 while ( $read = $factory->next_read ) {
@@ -34,29 +34,4 @@ while ( $read = $factory->next_read ) {
 };
 is $nof_reads, 1000;
 
-
-# Combined errors with linear model
-
-ok $factory = Grinder->new(
-   -reference_file   => data('shotgun_database_extended.fa'),
-   -unidirectional   => 1                                   ,
-   -read_dist        => (20, 'normal', 10)                  ,
-   -total_reads      => 1000                                ,
-   -homopolymer_dist => 'balzer'                            ,
-   -mutation_ratio   => (85, 15)                            ,
-   -mutation_dist    => ('linear', 2, 2)                    ,
-   -chimera_perc     => 10                                  ,
-   -chimera_dist     => (100)                               ,
-   -chimera_kmer     => 0                                   ,
-), 'Combined errors (linear)';
-
-$nof_reads = 0;
-while ( $read = $factory->next_read ) {
-   $nof_reads++;
-   isa_ok $read, 'Bio::Seq::SimulatedRead';
-   is $read->id, $nof_reads;
-};
-is $nof_reads, 1000;
-
-
 done_testing();
diff --git a/t/data/multiple_amplicon_database.fa b/t/data/multiple_amplicon_database.fa
index f0087eb..61caadf 100644
--- a/t/data/multiple_amplicon_database.fa
+++ b/t/data/multiple_amplicon_database.fa
@@ -17,8 +17,5 @@ ttttttttttttttttttttttttttttttttttttttGTACACACCGCCCGT
 >seq3 nof_amplicons=1
 AAACTUAAAGGAATTGACGGaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 GTACACACCGCCCGTccccccccccccccccccccccccccccccccccccccccccccccccccc
->seq4 nof_amplicons=2 one on each strand
-AAACTTAAAGGAATTGACGGaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-GTACACACCGCCCGTccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
-ACGGGCGGTGTGTACtttttttttttttttttttttttttttttcccccccctttttttttttttttttttttttCCGTC
-AATTCCTTTAAGTTTccccccc
+
+
diff --git a/t/data/nested_amplicon_database.fa b/t/data/nested_amplicon_database.fa
index 5acaa8f..01927d5 100644
--- a/t/data/nested_amplicon_database.fa
+++ b/t/data/nested_amplicon_database.fa
@@ -3,8 +3,3 @@ cccccAAACTUAAAGGAATTGACGGaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 aaaaaGTACACACCGCCCGTcccccAAACTUAAAGGAATTGACGGcccccAAACTUAAAGGAATTGACGGccccAAACTT
 AAAGGAATTGRCGGttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttGTACAC
 ACCGCCCGTccccGTACACACCGCCCGTcc
-
->seq2 template FRFR: a short match on reverse strand and a long match on forward
-AAACTTAAAGGAATTGACGGaaaaaaaaaACGGGCGGTGTGTACcccccccccccccccccccccccccccccccccccc
-ccccccccccccccccccccccccCCGTCAATTCCTTTAAGTTTaaaaaaaaaGTACACACCGCCCGT
-
diff --git a/t/data/shotgun_database.fa b/t/data/shotgun_database.fa
index e37b7df..5ff6d12 100644
--- a/t/data/shotgun_database.fa
+++ b/t/data/shotgun_database.fa
@@ -14,6 +14,3 @@ gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
 tttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttt
 >seq5 last sequence, last comment
 aaaaaaaaaattttttttttttttttttttttttttttttttttttttttttttttttttttttttttttgggggggggg
->seq6 0 bp sequence
-
-
diff --git a/t/data/shotgun_database_extended.fa b/t/data/shotgun_database_extended.fa
deleted file mode 100644
index 0619dc2..0000000
--- a/t/data/shotgun_database_extended.fa
+++ /dev/null
@@ -1,20 +0,0 @@
->seq1 this is the first sequence
-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
->seq2
-cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
-cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
-cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
->seq3
-gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
-gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
->seq4
-tttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttt
->seq5 last sequence, last comment
-aaaaaaaaaattttttttttttttttttttttttttttttttttttttttttttttttttttttttttttgggggggggg
->seq6 0 bp sequence
-
->seq7 1 bp sequence
-a
diff --git a/utils/average_genome_size b/utils/average_genome_size
index 304aa83..50c6f32 100755
--- a/utils/average_genome_size
+++ b/utils/average_genome_size
@@ -1,72 +1,15 @@
 #! /usr/bin/env perl
 
-# This file is part of the Grinder package, copyright 2009-2012
+# This file is part of the Grinder package, copyright 2009,2010,2011,2012
 # Florent Angly <florent.angly at gmail.com>, under the GPLv3 license
 
-
-=head1 NAME
-
-average_genome_size - Calculate the average genome size (in bp) of species in a Grinder library
-
-=head1 DESCRIPTION
-
-Calculate the average genome size (in bp) of species in a Grinder library given
-the library composition and the full-genomes used to produce it.
-
-=head1 REQUIRED ARGUMENTS
-
-=over
-
-=item <db_fasta>
-
-FASTA file containing the full-genomes used to produce the Grinder library.
-
-=for Euclid:
-   db_fasta.type: readable
-
-=item <rank_file>
-
-Grinder rank file that describes the library composition.
-
-=for Euclid:
-   rank_file.type: readable
-
-=back
-
-=head1 COPYRIGHT
-
-Copyright 2009-2012 Florent ANGLY <florent.angly at gmail.com>
-
-Grinder is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License (GPL) as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-Grinder is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with Grinder.  If not, see <http://www.gnu.org/licenses/>.
-
-=head1 BUGS
-
-All complex software has bugs lurking in it, and this program is no exception.
-If you find a bug, please report it on the SourceForge Tracker for Grinder:
-L<http://sourceforge.net/tracker/?group_id=244196&atid=1124737>
-
-Bug reports, suggestions and patches are welcome. Grinder's code is developed
-on Sourceforge (L<http://sourceforge.net/scm/?type=git&group_id=244196>) and is
-under Git revision control. To get started with a patch, do:
-
-   git clone git://biogrinder.git.sourceforge.net/gitroot/biogrinder/biogrinder
-
-=cut
-
-
 use strict;
 use warnings;
-use Getopt::Euclid qw( :minimal_keys );
-average_genome_size($ARGV{'db_fasta'}, $ARGV{'rank_file'});
+my $usage = "Usage: $0 FASTA_DATABASE GRINDER_RANK_FILE\n".
+  "$0 calculates the average genome size (in bp) of a simulated random library produces by Grinder\n";
+my $db_fasta  = $ARGV[0] || die $usage;
+my $rank_file = $ARGV[1] || die $usage;
+average_genome_size($db_fasta, $rank_file);
 exit;
 
 
diff --git a/utils/change_paired_read_orientation b/utils/change_paired_read_orientation
index 5871dd7..1cd6241 100755
--- a/utils/change_paired_read_orientation
+++ b/utils/change_paired_read_orientation
@@ -1,74 +1,17 @@
 #! /usr/bin/env perl
 
-# This file is part of the Grinder package, copyright 2009-2012
+# This file is part of the Grinder package, copyright 2009,2010,2011,2012
 # Florent Angly <florent.angly at gmail.com>, under the GPLv3 license
 
-
-=head1 NAME
-
-change_paired_read_orientation - Change the orientation of paired-end reads in a
-FASTA file
-
-=head1 DESCRIPTION
-
-Reverse the orientation, i.e. reverse-complement each right-hand paired-end read
-(ID ending in /2) in a FASTA file.
-
-=head1 REQUIRED ARGUMENTS
-
-=over
-
-=item <in_fasta>
-
-FASTA file containing the reads to re-orient.
-
-=for Euclid:
-   in_fasta.type: readable
-
-=item <out_fasta>
-
-Output FASTA file where to write the reads.
-
-=for Euclid:
-   out_fasta.type: writable
-
-=back
-
-=head1 COPYRIGHT
-
-Copyright 2009-2012 Florent ANGLY <florent.angly at gmail.com>
-
-Grinder is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License (GPL) as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-Grinder is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with Grinder.  If not, see <http://www.gnu.org/licenses/>.
-
-=head1 BUGS
-
-All complex software has bugs lurking in it, and this program is no exception.
-If you find a bug, please report it on the SourceForge Tracker for Grinder:
-L<http://sourceforge.net/tracker/?group_id=244196&atid=1124737>
-
-Bug reports, suggestions and patches are welcome. Grinder's code is developed
-on Sourceforge (L<http://sourceforge.net/scm/?type=git&group_id=244196>) and is
-under Git revision control. To get started with a patch, do:
-
-   git clone git://biogrinder.git.sourceforge.net/gitroot/biogrinder/biogrinder
-
-=cut
-
-
 use strict;
 use warnings;
-use Getopt::Euclid qw( :minimal_keys );
 use Bio::SeqIO;
-change_paired_read_orientation($ARGV{'in_fasta'}, $ARGV{'out_fasta'});
+
+my $usage = "Usage: $0 INPUT_FASTA_FILE OUTPUT_FASTA_FILE\n".
+  "$0 reverses the orientation of each right-hand paired-end read (ID ending in /2) in a FASTA file\n";
+my $in_fasta  = $ARGV[0] || die $usage;
+my $out_fasta = $ARGV[1] || die $usage;
+change_paired_read_orientation($in_fasta, $out_fasta);
 exit;
 
 

-- 
Versatile shotgun and amplicon read simulator



More information about the debian-med-commit mailing list