[med-svn] [tophat] 01/04: Imported Upstream version 2.0.10

Alex Mestiashvili malex-guest at moszumanska.debian.org
Thu Nov 21 10:21:33 UTC 2013


This is an automated email from the git hooks/post-receive script.

malex-guest pushed a commit to branch master
in repository tophat.

commit e58d4d0b18548b92609ff3e3be5dab765474f273
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date:   Thu Nov 21 09:45:23 2013 +0100

    Imported Upstream version 2.0.10
---
 README                      |    2 +
 THANKS                      |   36 ++-
 configure                   |   24 +-
 configure.ac                |    2 +-
 src/common.cpp              |    4 +-
 src/common.h                |    4 +-
 src/long_spanning_reads.cpp |    6 +-
 src/prep_reads.cpp          |  140 ++++++++----
 src/reads.cpp               |  191 +++++++---------
 src/reads.h                 |  135 ++++++++---
 src/sam_juncs.cpp           |    2 -
 src/segment_juncs.cpp       |   38 +---
 src/tophat-fusion-post      |    7 +-
 src/tophat.py               |  114 +++++-----
 src/tophat_reports.cpp      |  528 ++++++++++++++++++++++++-------------------
 15 files changed, 694 insertions(+), 539 deletions(-)

diff --git a/README b/README
index 37d9c05..f7ce76a 100644
--- a/README
+++ b/README
@@ -28,3 +28,5 @@ Samtools files as indicated (the header files and libbam.a)
 TopHat also requires the Boost libraries (http://www.boost.org).
 Please refer to the TopHat webpage for installation information.
 
+Note TopHat uses the SeqAn library that comes with the TopHat2 source code distribution.
+It is not necessary for the SeqAn library to be installed separately in order to run TopHat.
diff --git a/THANKS b/THANKS
index bd93110..a6c7889 100644
--- a/THANKS
+++ b/THANKS
@@ -1,4 +1,28 @@
-TopHat THANKS file
+TopHat and TopHat2 THANKS file
+
+TopHat2 was developed by Daehwan Kim, Geo Pertea, Cole Trapnell, and Steven Salzberg,
+with help of Harold Pimentel and Ryan Kelly. Lior Pachter and Adam Roberts contributed to
+the discussion of TopHat2.  The following list shows our contacts and people
+who contributed to the development of TopHat2 by suggesting improvements and giving feedbacks
+as well as reporting bugs with their test data set.
+
+Arthur Delcher	     	aldelcher at gmail.com
+Liliana Florea	     	florea at jhu.edu
+Brian Haas		bhaas at broadinstitute.org
+Kasper Hansen		kasperdanielhansen at gmail.com
+Ryan Kelley	     	ryankelly at illumina.com
+Daehwan Kim   	 	infphilo at gmail.com
+Ben Langmead		langmea at cs.jhu.edu
+Lior Pachter		lpachter at math.berkeley.edu
+Ela Pertea		mpertea at jhu.edu
+Geo Pertea		gpertea at jhu.edu
+Harold Pimentel		pimentel at cs.berkeley.edu
+Nathalie Pochet		npochet at broadinstitute.org
+Adam Roberts		adarob at gmail.com
+Cole Trapnell		cole at broadinstitute.org
+Steven Salzberg	 	salzberg at jhu.edu
+Alec Wysoker		alecw at broadinstitute.org
+
 
 TopHat was originally written by Cole Trapnell, with substantial creative
 input from Lior Pachter and Steven Salzberg. Many people further contributed
@@ -10,20 +34,20 @@ Robert Bradley		rbradley at math.berkeley.edu
 Steven Brenner		brenner at compbio.berkeley.edu
 Angela Brooks		angelabrooks at berkeley.edu
 Mark Diekhans		markd at soe.ucsc.edu
-Mike Duff			moduff at gmail.com
+Mike Duff		moduff at gmail.com
 Steffen Durinck		sdurinck at illumina.com
 Matthew Fitzgibbon	mfitzgib at fhcrc.org
 Brenton Graveley	graveley at neuron.uchc.edu
-Kasper Hansen		khansen at stat.berkeley.edu
+Kasper Hansen		kasperdanielhansen at gmail.com
 Rachel Harte		hartera at soe.ucsc.edu
-Ben Langmead		langmead at cs.umd.edu
+Ben Langmead		langmea at cs.jhu.edu
 Richard McCombie	mccombie at cshl.edu
 Ali Mortazavi		alim at caltech.edu
 Adam Phillippy		amp at umiacs.umd.edu
-Mike Schatz			mschatz at umiacs.umd.edu
+Mike Schatz		mschatz at cshl.edu
 Gary Schroth		gschroth at illumina.com
 Gavin Sherlock		sherlock at genome.stanford.edu
-Diane Trout			diane at caltech.edu
+Diane Trout		diane at caltech.edu
 Svilen Tzonev		stzonev at illumina.com
 Jeltje Van Baren	jeltje at wustl.edu
 Barbara Wold		wold at caltech.edu
diff --git a/configure b/configure
index 0553e54..932984f 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.59 for tophat 2.0.9.
+# Generated by GNU Autoconf 2.59 for tophat 2.0.10.
 #
 # Report bugs to <tophat.cufflinks at gmail.com>.
 #
@@ -269,8 +269,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='tophat'
 PACKAGE_TARNAME='tophat'
-PACKAGE_VERSION='2.0.9'
-PACKAGE_STRING='tophat 2.0.9'
+PACKAGE_VERSION='2.0.10'
+PACKAGE_STRING='tophat 2.0.10'
 PACKAGE_BUGREPORT='tophat.cufflinks at gmail.com'
 
 ac_unique_file="config.h.in"
@@ -792,7 +792,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures tophat 2.0.9 to adapt to many kinds of systems.
+\`configure' configures tophat 2.0.10 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -858,7 +858,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of tophat 2.0.9:";;
+     short | recursive ) echo "Configuration of tophat 2.0.10:";;
    esac
   cat <<\_ACEOF
 
@@ -1009,7 +1009,7 @@ fi
 test -n "$ac_init_help" && exit 0
 if $ac_init_version; then
   cat <<\_ACEOF
-tophat configure 2.0.9
+tophat configure 2.0.10
 generated by GNU Autoconf 2.59
 
 Copyright (C) 2003 Free Software Foundation, Inc.
@@ -1023,7 +1023,7 @@ cat >&5 <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by tophat $as_me 2.0.9, which was
+It was created by tophat $as_me 2.0.10, which was
 generated by GNU Autoconf 2.59.  Invocation command line was
 
   $ $0 $@
@@ -1361,7 +1361,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 cat >>confdefs.h <<\_ACEOF
-#define SVN_REVISION "4112M"
+#define SVN_REVISION "4167"
 _ACEOF
 
 
@@ -1675,7 +1675,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='tophat'
- VERSION='2.0.9'
+ VERSION='2.0.10'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -7134,7 +7134,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='tophat'
- VERSION='2.0.9'
+ VERSION='2.0.10'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -7966,7 +7966,7 @@ _ASBOX
 } >&5
 cat >&5 <<_CSEOF
 
-This file was extended by tophat $as_me 2.0.9, which was
+This file was extended by tophat $as_me 2.0.10, which was
 generated by GNU Autoconf 2.59.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -8029,7 +8029,7 @@ _ACEOF
 
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-tophat config.status 2.0.9
+tophat config.status 2.0.10
 configured by $0, generated by GNU Autoconf 2.59,
   with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
 
diff --git a/configure.ac b/configure.ac
index 3d00c55..7fc65ab 100644
--- a/configure.ac
+++ b/configure.ac
@@ -4,7 +4,7 @@ m4_include([ax_bam.m4])
 #m4_include([ax_check_zlib.m4])
 
 define([svnversion], esyscmd([sh -c "svnversion|tr -d '\n'"]))dnl
-AC_INIT([tophat],[2.0.9],[tophat.cufflinks at gmail.com])
+AC_INIT([tophat],[2.0.10],[tophat.cufflinks at gmail.com])
 AC_DEFINE(SVN_REVISION, "svnversion", [SVN Revision])
 
 AC_CONFIG_SRCDIR([config.h.in])
diff --git a/src/common.cpp b/src/common.cpp
index 64cb587..bc6664e 100644
--- a/src/common.cpp
+++ b/src/common.cpp
@@ -126,7 +126,7 @@ int read_edit_dist = 2;
 int read_realign_edit_dist = 2;
 int max_splice_mismatches = 1;
 
-ReadFormat reads_format = FASTQ;
+//ReadFormat reads_format = FASTQ;
 
 bool verbose = false;
 
@@ -465,12 +465,14 @@ int parse_options(int argc, char** argv, void (*print_usage)())
     switch (next_option) {
     case -1:
       break;
+    /*
     case OPT_FASTA:
       reads_format = FASTA;
       break;
     case OPT_FASTQ:
       reads_format = FASTQ;
       break;
+    */
     case OPT_MIN_ANCHOR:
       min_anchor_len = (uint32_t)parseIntOpt(3, "--min-anchor arg must be at least 3", print_usage);
       break;
diff --git a/src/common.h b/src/common.h
index 40e42d3..49f904e 100644
--- a/src/common.h
+++ b/src/common.h
@@ -90,8 +90,8 @@ extern int read_realign_edit_dist;
 
 extern int max_splice_mismatches;
 
-enum ReadFormat {FASTA, FASTQ};
-extern ReadFormat reads_format;
+enum ReadFormat {FASTX_AUTO=0, FASTA, FASTQ};
+//extern ReadFormat reads_format;
 
 extern bool verbose;
 extern unsigned int max_multihits;
diff --git a/src/long_spanning_reads.cpp b/src/long_spanning_reads.cpp
index d07ff19..598e4a9 100644
--- a/src/long_spanning_reads.cpp
+++ b/src/long_spanning_reads.cpp
@@ -2583,11 +2583,11 @@ bool dfs_seg_hits(RefSequenceTable& rt,
 	      if (success)
 		join_success = true;
 
-	      if (num_try <= 0)
-		return join_success;
-	      
 	      seg_hit_stack.pop_back();
 	      seg_hit_stack.back() = tempHit;
+
+	      if (num_try <= 0)
+		return join_success;
 	    }
 	}
     }
diff --git a/src/prep_reads.cpp b/src/prep_reads.cpp
index 897093e..a989dbf 100644
--- a/src/prep_reads.cpp
+++ b/src/prep_reads.cpp
@@ -44,7 +44,7 @@ void load_readmap(string& flt_fname, vector<bool>& rmap) {
   if (flt_fname.empty()) return;
   ReadStream rdstream(flt_fname, NULL, true);
   Read read;
-  while (rdstream.get_direct(read, reads_format)) {
+  while (rdstream.get_direct(read)) {
     uint32_t rnum=(uint32_t)atol(read.name.c_str());
     if (rnum>=(uint32_t) rmap.size())
          rmap.resize(rnum+1, false);
@@ -209,7 +209,7 @@ void writePrepBam(GBamWriter* wbam, Read& read, uint32_t rid, char trashcode=0,
   wbam->write(bamrec.get_b(), rid);
 }
 
-bool processRead(int matenum, Read& read, uint32_t next_id,  int& num_reads_chucked,
+bool processRead(int matenum, Read& read, ReadFormat rd_format, uint32_t next_id,  int& num_reads_chucked,
 		int& multimap_chucked, GBamWriter* wbam, FILE* fout, FILE* fqindex,
 		int& min_read_len, int& max_read_len, uint64_t& fout_offset, vector<bool>& rmap) {
 	if (read.seq.length()<12) {
@@ -269,7 +269,7 @@ bool processRead(int matenum, Read& read, uint32_t next_id,  int& num_reads_chuc
 	}
 
 	if (wbam) {
-		if (reads_format == FASTA && !quals)
+		if (rd_format == FASTA && !quals)
 			read.qual = string(read.seq.length(), 'I').c_str();
 		else if (color && quals)
 			read.qual = "!" + read.qual;
@@ -281,7 +281,7 @@ bool processRead(int matenum, Read& read, uint32_t next_id,  int& num_reads_chuc
 		// because it may contain some control characters such as "\" from quality values.
 		// Here, buf is only used for calculating the file offset
 		char buf[2048] = {0};
-		if (reads_format == FASTQ or (reads_format == FASTA && quals))
+		if (rd_format == FASTQ or (rd_format == FASTA && quals))
 		{
 			sprintf(buf, "@%u\n%s\n+%s\n%s\n",
 					next_id,
@@ -295,7 +295,7 @@ bool processRead(int matenum, Read& read, uint32_t next_id,  int& num_reads_chuc
 					read.name.c_str(),
 					read.qual.c_str());
 		}
-		else if (reads_format == FASTA)
+		else if (rd_format == FASTA)
 		{
 			string qual;
 			if (color)
@@ -342,25 +342,30 @@ void process_reads(vector<string>& reads_fnames, vector<FZPipe>& quals_files,
   int multimap_chucked = 0;
   int mate_num_reads_chucked = 0;
   int mate_multimap_chucked = 0;
-  int min_read_len = 20000000;
-  int max_read_len = 0;
-  int mate_min_read_len = 20000000;
-  int mate_max_read_len = 0;
+  int left_min_len = 20000000;
+  int left_max_len = 0;
+  int unpaired_min_len=left_min_len;
+  int unpaired_max_len=left_max_len;
+  int unpaired_num_chucked=0;
+  int unpaired_multimap_chucked=0;
+  int right_min_len = 20000000;
+  int right_max_len = 0;
   uint32_t next_id = 0;
+  uint32_t unpaired_count=0; //only used when PE_data + extra unpaired reads given
   uint32_t num_left = 0;
   uint32_t num_mates = 0;
+  bool PE_data = (mate_fnames.size() > 0);
 
   FILE* fw=NULL; //aux output file
   string outfname; //std_outfile after instancing template
   string mate_outfname;
   string idxfname; //index_outfile after instancing template
   string mate_idxfname;
-  bool have_mates = (mate_fnames.size() > 0);
 
   if (!aux_outfile.empty()) {
     fw=fopen(aux_outfile.c_str(), "w");
     if (fw==NULL)
-       err_die(ERR_FILE_CREATE,aux_outfile.c_str());
+       err_die(ERR_FILE_CREATE, aux_outfile.c_str());
     }
 
   FILE* fqindex = NULL; //fastq index
@@ -374,7 +379,7 @@ void process_reads(vector<string>& reads_fnames, vector<FZPipe>& quals_files,
   if (std_outfile.empty()) {
     fout=stdout;
     //for PE reads, flt_side will decide which side is printed (can't be both)
-    if (have_mates && flt_side==2)
+    if (PE_data && flt_side==2)
       err_die("Error: --flt-side option required for PE reads directed to stdout!\n");
     mate_fout=stdout;
   }
@@ -382,7 +387,7 @@ void process_reads(vector<string>& reads_fnames, vector<FZPipe>& quals_files,
 	//could be a template
 	if (std_outfile.find("%side%") != string::npos) {
 	  outfname=str_replace(std_outfile, "%side%", "left");
-	  if (have_mates)
+	  if (PE_data)
 	    mate_outfname=str_replace(std_outfile, "%side%", "right");
 	}
 	else {
@@ -390,7 +395,7 @@ void process_reads(vector<string>& reads_fnames, vector<FZPipe>& quals_files,
 	}
 	if (index_outfile.find("%side%") != string::npos) {
 	  idxfname=str_replace(index_outfile, "%side%", "left");
-	  if (have_mates)
+	  if (PE_data)
 	    mate_idxfname=str_replace(index_outfile, "%side%", "right");
 	}
 	else {
@@ -431,30 +436,35 @@ void process_reads(vector<string>& reads_fnames, vector<FZPipe>& quals_files,
 	Read read;
 	Read mate_read;
     ReadStream* reads=NULL;
+    ReadFormat rd_format=FASTX_AUTO;
 	ReadStream* mate_reads=NULL;
+    ReadFormat mate_rd_format=FASTX_AUTO;
 	FZPipe* fq=NULL;
 	FZPipe* mate_fq=NULL;
 	bool have_l_reads=(fi<reads_fnames.size());
-	bool have_r_reads=(have_mates && fi<mate_fnames.size());
+	bool have_r_reads=(PE_data && fi<mate_fnames.size());
 	if (have_l_reads) {
 	  if (quals)
 		fq = & quals_files[fi];
 	  reads=new ReadStream(reads_fnames[fi], fq, true);
+	  rd_format=reads->get_format();
 	}
 	if (have_r_reads) {
 	  if (quals)
 		 mate_fq = & mate_quals_files[fi];
 	  mate_reads=new ReadStream(mate_fnames[fi], mate_fq, true);
+	  mate_rd_format=mate_reads->get_format();
 	}
 
 	while (have_l_reads || have_r_reads) {
-	  if (have_l_reads && (have_l_reads=reads->get_direct(read, reads_format)))
+	  if (have_l_reads && (have_l_reads=reads->get_direct(read)))
 		 num_left++;
 	  // Get the next read from the file
 	  int matenum=0; // 0 = unpaired, 1 = left, 2 = right
-	  if (have_r_reads && (have_r_reads=mate_reads->get_direct(mate_read, reads_format)) ) {
+	  if (have_r_reads && (have_r_reads=mate_reads->get_direct(mate_read)) ) {
 		num_mates++;
 	  }
+	  bool have_reads = (have_l_reads || have_r_reads);
 	  if (have_l_reads && have_r_reads) {
 		matenum = 1; //read is first in a pair
 		if (have_l_reads && have_r_reads && !possible_mate_mismatch) {
@@ -478,19 +488,56 @@ void process_reads(vector<string>& reads_fnames, vector<FZPipe>& quals_files,
 		  }
 		} //mate check
 	  } //paired reads
-	  if (have_l_reads || have_r_reads) {
+	  else if (PE_data && have_reads) {
+         ++unpaired_count;
+         if (have_l_reads) --num_left;
+         else --num_mates; //if (have_r_reads)
+	  }
+	  if (have_reads) {
 		  //IMPORTANT: to keep paired reads in sync, this must be
 		  //incremented BEFORE any reads are chucked !
 		  ++next_id;
 	  }
-	  if ((flt_side & 1)==0 && have_l_reads)
-		  //for unpaired reads or left read in a pair
-	      processRead(matenum, read, next_id,  num_reads_chucked, multimap_chucked,
-		     wbam, fout, fqindex, min_read_len,  max_read_len,  fout_offset, readmap);
+	  int* min_rd_len=NULL, *max_rd_len=NULL,
+			  *num_rd_chucked=NULL, *num_multimap_chucked=NULL;
+	  if ((flt_side & 1)==0 && have_l_reads) {
+		  if (PE_data && matenum==0) {
+			  //extra unpaired read
+			  min_rd_len=&unpaired_min_len;
+			  max_rd_len=&unpaired_max_len;
+			  num_rd_chucked=&unpaired_num_chucked;
+			  num_multimap_chucked=&unpaired_multimap_chucked;
+		  }
+		  else { //paired read
+			  min_rd_len=&left_min_len;
+			  max_rd_len=&left_max_len;
+			  num_rd_chucked=&num_reads_chucked;
+			  num_multimap_chucked=&multimap_chucked;
+		  }
+	      processRead(matenum, read, rd_format, next_id,  *num_rd_chucked, *num_multimap_chucked,
+		     wbam, fout, fqindex, *min_rd_len,  *max_rd_len,  fout_offset, readmap);
+
+	  }
 	  if (flt_side>0 && have_r_reads) {
-		  matenum = have_l_reads ? 2 : 0;
-		  processRead(matenum, mate_read, next_id,  mate_num_reads_chucked, mate_multimap_chucked,
-			  mate_wbam, mate_fout, mate_fqindex, mate_min_read_len,  mate_max_read_len,
+		  //matenum = have_l_reads ? 2 : 0;
+		  if (have_l_reads) {
+			  matenum = 2;
+			  min_rd_len=&right_min_len;
+			  max_rd_len=&right_max_len;
+			  num_rd_chucked=&mate_num_reads_chucked;
+			  num_multimap_chucked=&mate_multimap_chucked;
+		  }
+		  else { //paired read
+			  //extra unpaired read
+			  matenum = 0;
+			  min_rd_len=&unpaired_min_len;
+			  max_rd_len=&unpaired_max_len;
+			  num_rd_chucked=&unpaired_num_chucked;
+			  num_multimap_chucked=&unpaired_multimap_chucked;
+		  }
+
+		  processRead(matenum, mate_read, mate_rd_format, next_id,  *num_rd_chucked, *num_multimap_chucked,
+			  mate_wbam, mate_fout, mate_fqindex, *min_rd_len,  *max_rd_len,
 			  mate_fout_offset, mate_readmap);
       }
     } //while !fr.isEof()
@@ -507,35 +554,42 @@ void process_reads(vector<string>& reads_fnames, vector<FZPipe>& quals_files,
 		  multimap_chucked, flt_reads_fnames[0].c_str());
   }
 
-  if (have_mates && (fout!=stdout || flt_side>0)) {
+  if (PE_data && (fout!=stdout || flt_side>0)) {
 	fprintf(stderr, "%u out of %u read mates have been filtered out\n",
 		mate_num_reads_chucked, num_mates);
 	if (readmap_loaded && mate_multimap_chucked)
 	  fprintf(stderr, "\t(%u mates filtered out due to %s)\n",
 		  mate_multimap_chucked, flt_reads_fnames[1].c_str());
   }
-
+  //we should also print to stderr stats related to the extra unpaired reads here, if any
   if (wbam) { delete wbam; }
   if (mate_wbam) { delete mate_wbam; }
   if (fout && fout!=stdout) fclose(fout);
   if (mate_fout) fclose(mate_fout);
   if (fw!=NULL) {
-	string side("");
-	if (have_mates)
-	   side="left_";
-    fprintf(fw, "%smin_read_len=%d\n", side.c_str(), min_read_len - (color ? 1 : 0));
-    fprintf(fw, "%smax_read_len=%d\n", side.c_str(), max_read_len - (color ? 1 : 0));
-    fprintf(fw, "%sreads_in =%d\n", side.c_str(), num_left);
-    fprintf(fw, "%sreads_out=%d\n", side.c_str(), num_left-num_reads_chucked);
-    if (have_mates) {
-      side="right_";
-      fprintf(fw, "%smin_read_len=%d\n", side.c_str(), mate_min_read_len - (color ? 1 : 0));
-      fprintf(fw, "%smax_read_len=%d\n", side.c_str(), mate_max_read_len - (color ? 1 : 0));
-      fprintf(fw, "%sreads_in =%d\n", side.c_str(), num_mates);
-      fprintf(fw, "%sreads_out=%d\n", side.c_str(), num_mates-mate_num_reads_chucked);
-    }
-    fclose(fw);
-    }
+	  string side("");
+	  if (PE_data)
+		  side="left_";
+	  fprintf(fw, "%smin_read_len=%d\n", side.c_str(), left_min_len - (color ? 1 : 0));
+	  fprintf(fw, "%smax_read_len=%d\n", side.c_str(), left_max_len - (color ? 1 : 0));
+	  fprintf(fw, "%sreads_in =%d\n", side.c_str(), num_left);
+	  fprintf(fw, "%sreads_out=%d\n", side.c_str(), num_left-num_reads_chucked);
+	  if (PE_data) {
+		  side="right_";
+		  fprintf(fw, "%smin_read_len=%d\n", side.c_str(), right_min_len - (color ? 1 : 0));
+		  fprintf(fw, "%smax_read_len=%d\n", side.c_str(), right_max_len - (color ? 1 : 0));
+		  fprintf(fw, "%sreads_in =%d\n", side.c_str(), num_mates);
+		  fprintf(fw, "%sreads_out=%d\n", side.c_str(), num_mates-mate_num_reads_chucked);
+		  if (unpaired_count) {
+			  side="unpaired_";
+			  fprintf(fw, "%smin_read_len=%d\n", side.c_str(), unpaired_min_len - (color ? 1 : 0));
+			  fprintf(fw, "%smax_read_len=%d\n", side.c_str(), unpaired_max_len - (color ? 1 : 0));
+			  fprintf(fw, "%sreads_in =%d\n", side.c_str(), unpaired_count);
+			  fprintf(fw, "%sreads_out=%d\n", side.c_str(), unpaired_count-unpaired_num_chucked);
+		  }
+	  }
+	  fclose(fw);
+  }
 
   if (fqindex) fclose(fqindex);
   if (mate_fqindex) fclose(mate_fqindex);
diff --git a/src/reads.cpp b/src/reads.cpp
index 73dc58b..d46debf 100644
--- a/src/reads.cpp
+++ b/src/reads.cpp
@@ -60,118 +60,35 @@ char* FLineReader::nextLine() {
    return buf;
 }
 
-void skip_lines(FLineReader& fr)
+ReadFormat skip_lines(FLineReader& fr)
 {
-  if (fr.fhandle() == NULL) return;
+  if (fr.fhandle() == NULL) return FASTX_AUTO;
   char* buf = NULL;
+  ReadFormat rfmt=FASTX_AUTO;
+  int lcount=0;
   while ((buf = fr.nextLine()) != NULL) {
-    if (buf[0] == '\0') continue;
-    if (buf[0] == '>' || buf[0] == '@')
-      {
-	fr.pushBack();
+	++lcount;
+	int i=0;
+    while (buf[i]!=0 && buf[i]==' ') ++i;
+    if (buf[i] == '\0') continue; //skip empty lines
+    if (buf[0] == '>') rfmt=FASTA;
+    if (buf[0] == '@') rfmt=FASTQ;
+    if (rfmt) {
+	   fr.pushBack();
+	   break;
+    }
+  /*
+  if (lcount==500) { //skipped 500 lines already, something is wrong here (large header?!?)
+	warn_msg("Warning: cannot determine file format! (large header?)\n");
 	break;
-      }
+   }
+ */
   }
-}
-
-bool next_fasta_record(FLineReader& fr,
-		       string& defline, 
-		       string& seq,
-		       ReadFormat reads_format)
-
-{
-  seq.clear();
-  defline.clear();
-  char* buf=NULL;
-  while ((buf=fr.nextLine())!=NULL) {
-    if (buf[0]==0) continue; //skip empty lines
-    if ((reads_format == FASTA && buf[0] == '>') || (reads_format == FASTQ && (buf[0] == '+' || buf[0] == '@'))) { //next record
-        if (seq.length()>0) { //current record ending
-           fr.pushBack();
-           return true;
-           }
-        defline=buf+1;
-        string::size_type space_pos = defline.find_first_of(" \t");
-        if (space_pos != string::npos) {
-            defline.resize(space_pos);
-            }
-        continue;
-        } //defline
-    // sequence line
-    seq.append(buf);
-    } //line reading loop
 
-  replace(seq.begin(), seq.end(), '.', color ? '4' : 'N'); //shouldn't really be needed for FASTA files
-  return !(seq.empty());
+  return rfmt;
 }
 
-bool next_fastq_record(FLineReader& fr,
-		       const string& seq,
-		       string& alt_name,
-		       string& qual,
-		       ReadFormat reads_format)
-{
-  alt_name.clear();
-  qual.clear();
-  char* fline=fr.nextLine();
-  if (fline==NULL) return false;
-  while (fline[0]==0) { //skip empty lines
-    fline=fr.nextLine();
-    if (fline==NULL) return false;
-    }
-  //must be on '+' line here
-  if (fline==NULL || (reads_format == FASTQ && fline[0] != '+') ||
-      (reads_format == FASTA && quals && fline[0] != '>')) {
-     err_exit("Error: '+' not found for fastq record %s\n",fline);
-     return false;
-     }
-  alt_name=fline+1;
-  string::size_type space_pos = alt_name.find_first_of(" \t");
-  if (space_pos != string::npos) alt_name.resize(space_pos);
-   //read qv line(s) now:
-  while ((fline=fr.nextLine())!=NULL) {
-    if (integer_quals)
-      {
-      vector<string> integer_qual_values;
-      tokenize(string(fline), " ", integer_qual_values);
-
-      string temp_qual;
-      for (size_t i = 0; i < integer_qual_values.size(); ++i)
-        {
-          int qual_value = atoi(integer_qual_values[i].c_str());
-          if (qual_value < 0) qual_value = 0;
-          temp_qual.push_back((char)(qual_value + 33));
-        }
-
-      qual.append(temp_qual);
-      }
-    else
-      qual.append(fline);
-      if (qual.length()>=seq.length()-1) break;
-     }
-  // final check
-  if (color) {
-     if (seq.length()==qual.length()) {
-        //discard first qv
-        qual=qual.substr(1);
-        }
-     if (seq.length()!=qual.length()+1) {
-        err_exit("Error: length of quality string does not match seq length (%d) for color read %s!\n",
-           seq.length(), alt_name.c_str());
-        }
-     }
-  else {
-    if (seq.length()!=qual.length()) {
-           err_exit("Error: qual string length (%d) differs from seq length (%d) for read %s!\n",
-               qual.length(), seq.length(), alt_name.c_str());
-           //return false;
-           }
-    }
-  //
-  return !(qual.empty());
-}
-
-bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format, 
+bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat rd_format,
                         FLineReader* frq) {
   /*
   if (fr.pushed_read)
@@ -185,8 +102,8 @@ bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format,
   char* buf=NULL;
   while ((buf=fr.nextLine())!=NULL) {
     if (buf[0]==0) continue; //skip empty lines
-    if ((reads_format == FASTA && buf[0] == '>') ||
-          (reads_format == FASTQ && (buf[0] == '+' || buf[0] == '@'))) { //next record
+    if ((rd_format == FASTA && buf[0] == '>') ||
+          (rd_format == FASTQ && (buf[0] == '+' || buf[0] == '@'))) { //next record
         if (read.seq.length()>0) { //current record ending
            fr.pushBack();
            break;
@@ -203,7 +120,7 @@ bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format,
     } //line reading loop
 
   replace(read.seq.begin(), read.seq.end(), '.', color ? '4' : 'N'); //shouldn't really be needed for FASTA files
-  if (reads_format != FASTQ && frq==NULL)
+  if (rd_format != FASTQ && frq==NULL)
       return (!read.seq.empty());
   if (frq==NULL) frq=&fr; //FASTQ
   //FASTQ or quals in a separate file -- now read quality values
@@ -214,8 +131,8 @@ bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format,
     if (buf==NULL) return false;
     }
   //must be on '+' line here
-  if (buf==NULL || (reads_format == FASTQ && buf[0] != '+') ||
-           (reads_format == FASTA && buf[0] != '>')) {
+  if (buf==NULL || (rd_format == FASTQ && buf[0] != '+') ||
+           (rd_format == FASTA && buf[0] != '>')) {
      err_exit("Error: beginning of quality values record not found! (%s)\n",buf);
      return false;
      }
@@ -605,11 +522,11 @@ void bam2Read(bam1_t *b, Read& rd, bool alt_name=false) {
 }
 
 
-bool ReadStream::next_read(QReadData& rdata, ReadFormat read_format) {
+bool ReadStream::next_read(QReadData& rdata) {
   while (read_pq.size()<ReadBufSize && !r_eof) {
     //keep the queue topped off
     Read rf;
-    if (get_direct(rf, read_format)) {
+    if (get_direct(rf)) {
       uint64_t id = (uint64_t)atol(rf.name.c_str());
       QReadData rdata(id, rf, last_b());
       read_pq.push(rdata);
@@ -623,7 +540,7 @@ bool ReadStream::next_read(QReadData& rdata, ReadFormat read_format) {
   return true;
 }
 
-bool ReadStream::get_direct(Read& r, ReadFormat read_format) {
+bool ReadStream::get_direct(Read& r) {
   if (fstream.file==NULL) return false;
   if (fstream.is_bam) {
 	 bool got_read=false;
@@ -650,7 +567,6 @@ bool ReadStream::get_direct(Read& r, ReadFormat read_format) {
 // reads must ALWAYS be requested in increasing order of their ID
 bool ReadStream::getRead(uint64_t r_id,
 		Read& read,
-		ReadFormat read_format,
 		bool strip_slash,
 		uint64_t begin_id,
 		uint64_t end_id,
@@ -672,7 +588,7 @@ bool ReadStream::getRead(uint64_t r_id,
 	read.clear();
 	while (!found) {
 		QReadData rdata;
-		if (!next_read(rdata, read_format))
+		if (!next_read(rdata))
 			break;
 		/*
     if (strip_slash) {
@@ -702,7 +618,52 @@ bool ReadStream::getRead(uint64_t r_id,
 			break;
 		}
 		if (rProc) { //skipped read processing (unmapped reads)
-			if (!rProc->process(rdata, found, is_unmapped))
+			if (!rProc->process(rdata, found)) //, is_unmapped))
+				//  rProc->process() should normally return TRUE
+				return false; //abort search for r_id, return "not found"
+		}
+	} //while target read id not found
+	return found;
+}
+
+bool ReadStream::getQRead(uint64_t r_id,
+		QReadData& qread,
+		uint64_t begin_id,
+		uint64_t end_id,
+		GetReadProc* rProc,
+		bool is_unmapped )
+ {
+	if (!fstream.file)
+		err_die("Error: calling ReadStream::getRead() with no file handle!");
+	if (r_id<last_id)
+		err_die("Error: ReadStream::getRead() called with out-of-order id#!");
+	last_id=r_id;
+	bool found=false;
+	qread.clear();
+	while (!found) {
+		QReadData rdata;
+		if (!next_read(rdata))
+			break;
+		if (rdata.id >= end_id)
+			return false;
+
+		if (rdata.id < begin_id)
+			continue; //silently skip until begin_id found
+		//does not trigger rProc->process() until begin_id
+		if (rdata.id == r_id)
+		{
+			qread=rdata; //it will be returned
+			found=true;
+		}
+		else if (rdata.id > r_id)
+		{ //can't find it, went too far
+			//only happens when reads [mates] were removed for some reason
+			//read_pq.push(make_pair(id, read));
+			read_pq.push(rdata);
+			break;
+		}
+		if (rProc) { //skipped read processing (unmapped reads)
+			if (!rProc->process(rdata, found)) //, is_unmapped))
 				//  rProc->process() should normally return TRUE
 				return false; //abort search for r_id, return "not found"
 		}
diff --git a/src/reads.h b/src/reads.h
index 7bf300e..9af4a7d 100644
--- a/src/reads.h
+++ b/src/reads.h
@@ -134,10 +134,8 @@ public:
     }
 };
 
-void skip_lines(FLineReader& fr);
-bool next_fasta_record(FLineReader& fr, string& defline, string& seq, ReadFormat reads_format);
-bool next_fastq_record(FLineReader& fr, const string& seq, string& alt_name, string& qual, ReadFormat reads_format);
-bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat reads_format=FASTQ,
+ReadFormat skip_lines(FLineReader& fr);
+bool next_fastx_read(FLineReader& fr, Read& read, ReadFormat rd_format=FASTQ,
                         FLineReader* frq=NULL);
 
 #define READSTREAM_BUF_SIZE 500000
@@ -147,9 +145,17 @@ struct QReadData { //read data for the priority queue
   Read read;
   char trashCode; //ZT tag value
   int8_t matenum; //mate number (1,2) 0 if unpaired
-  QReadData():id(0),read(),trashCode(0), matenum(0) { }
-  QReadData(uint64_t rid, Read& rd, bam1_t* bd=NULL):
-	 id(rid), read(rd), trashCode(0), matenum(0) {
+  bool um_written;
+  QReadData():id(0),read(),trashCode(0), matenum(0), um_written(false) { }
+  QReadData(uint64_t rid, Read& rd, bam1_t* bd=NULL) {
+  	  init(rid, rd, bd);
+  }
+  void init(uint64_t rid, Read& rd, bam1_t* bd=NULL) {
+	 id=rid;
+	 read=rd;
+	 trashCode=0;
+	 matenum=0;
+	 um_written=false;
      if (bd) {
        if (bd->core.flag & BAM_FREAD1) {
     	 matenum=1;
@@ -160,6 +166,14 @@ struct QReadData { //read data for the priority queue
      }
   }
 
+  void clear() {
+	  id=0;
+	  read.clear();
+	  trashCode=0;
+	  matenum=0;
+	  um_written=false;
+  }
+
 };
 
 
@@ -168,14 +182,48 @@ struct GetReadProc {
 	 GBamWriter* um_out; //skipped (unmapped) reads will be written here
 	 int64_t* unmapped_counter;
 	 int64_t* multimapped_counter;
-   //char um_code;
-	GetReadProc(GBamWriter* bamw=NULL, int64_t* um_counter=NULL, int64_t* mm_counter=NULL):
-		um_out(bamw), unmapped_counter(um_counter), multimapped_counter(mm_counter) { }
-	virtual bool process(QReadData& rdata, bool& found, bool is_unmapped) {
+
+	 int64_t* u_unmapped_counter;
+	 int64_t* u_multimapped_counter;
+
+	 //char um_code;
+	GetReadProc(GBamWriter* bamw=NULL, int64_t* um_counter=NULL, int64_t* mm_counter=NULL,
+			int64_t* u_um_counter=NULL, int64_t* u_mm_counter=NULL):
+		um_out(bamw), unmapped_counter(um_counter), multimapped_counter(mm_counter),
+		u_unmapped_counter(u_um_counter), u_multimapped_counter(u_mm_counter)
+		{ }
+	virtual bool process(QReadData& rdata, bool& found) { //, bool is_unmapped) {
 	//should return True  - if it returns FALSE it will cause getRead() to abort
 	//(stops looking for target readId in the stream) and to return false (="not found")
 		return true;
 	}
+	void writeUnmapped(QReadData& rdata) {
+		if (rdata.um_written) return;
+		string rname(rdata.read.alt_name);
+		size_t slash_pos=rname.rfind('/');
+		if (slash_pos!=string::npos && rname.length()-slash_pos<4)
+			rname.resize(slash_pos);
+		GBamRecord bamrec(rname.c_str(), -1, 0, false, rdata.read.seq.c_str(),
+				NULL, rdata.read.qual.c_str());
+		if (rdata.matenum) {
+			bamrec.set_flag(BAM_FPAIRED);
+			if (rdata.matenum==1) bamrec.set_flag(BAM_FREAD1);
+			else bamrec.set_flag(BAM_FREAD2);
+		}
+		//if (found && um_code && !rdata.trashCode) {
+		//rdata.trashCode=um_code;
+		//}
+		if (rdata.trashCode) {
+			//multi-mapped reads did not really QC-fail
+			//should also not be written to unmapped.bam
+			bamrec.add_aux("ZT", 'A', 1, (uint8_t*)&rdata.trashCode);
+			//if (rdata.trashCode!='M')
+				//bamrec.set_flag(BAM_FQCFAIL); //to be excluded from further processing?
+		}
+		um_out->write(&bamrec);
+		rdata.um_written=true;
+	}
+
 	virtual ~GetReadProc() { }
 };
 
@@ -184,6 +232,7 @@ class ReadStream {
 	FLineReader* flquals;
 	FLineReader* flseqs;
 	bool stream_copy;
+	ReadFormat read_format; //should be guessed when opening the stream
 	bam1_t* b;
 	bool bam_alt_name; //from BAM files, look for alt_name tag to retrieve the original read name
 	bool bam_ignoreQC; //from BAM files, ignore QC flag (return the next read even if it has QC fail)
@@ -203,20 +252,20 @@ class ReadStream {
          ReadOrdering > read_pq;
     uint64_t last_id; //keep track of last requested ID, for consistency check
     bool r_eof;
-    bool next_read(QReadData& rdata, ReadFormat read_format=FASTQ); //get top read from the queue
+    bool next_read(QReadData& rdata); //get top read from the queue
 
   public:
-    ReadStream(int bufsize=READSTREAM_BUF_SIZE):flquals(NULL), flseqs(NULL), stream_copy(false), b(NULL),
+    ReadStream(int bufsize=READSTREAM_BUF_SIZE):flquals(NULL), flseqs(NULL), stream_copy(false), read_format(FASTX_AUTO), b(NULL),
        bam_alt_name(false), bam_ignoreQC(false), fstream(), fquals(NULL),ReadBufSize(bufsize), read_pq(),
        last_id(0), r_eof(false) {   }
 
     ReadStream(const string& fname, FZPipe* pquals=NULL, bool guess_packer=false):flquals(NULL),
-    	flseqs(NULL), stream_copy(false), b(NULL), bam_alt_name(false), bam_ignoreQC(false), fstream(),
+    	flseqs(NULL), stream_copy(false), read_format(FASTX_AUTO), b(NULL), bam_alt_name(false), bam_ignoreQC(false), fstream(),
     	fquals(pquals), ReadBufSize(READSTREAM_BUF_SIZE), read_pq(), last_id(0), r_eof(false) {
       init(fname, pquals, guess_packer);
     }
     ReadStream(FZPipe& f_stream, FZPipe* pquals=NULL):flquals(NULL),
-    	 flseqs(NULL), stream_copy(true), b(NULL), bam_alt_name(false), bam_ignoreQC(false), fstream(f_stream),
+    	 flseqs(NULL), stream_copy(true), read_format(FASTX_AUTO), b(NULL), bam_alt_name(false), bam_ignoreQC(false), fstream(f_stream),
     	 fquals(pquals), ReadBufSize(READSTREAM_BUF_SIZE), read_pq(), last_id(0), r_eof(false) {
       //init(f_stream, pquals);
       if (fstream.is_bam) {
@@ -224,7 +273,10 @@ class ReadStream {
       }
       else  {
         flseqs=new FLineReader(fstream.file);
-        skip_lines(*flseqs);
+        read_format=skip_lines(*flseqs);
+        if (read_format==FASTX_AUTO)
+          warn_msg("Warning: could not determine format for file '%s'! (large header?)\n", fstream.filename.c_str());
+
       }
       fquals=pquals;
       if (fquals) {
@@ -239,29 +291,33 @@ class ReadStream {
     void ignoreQC(bool v=true) {
       bam_ignoreQC=v;
     }
-
+    ReadFormat get_format() { return read_format; }
     void init(const string& fname, FZPipe* pquals=NULL, bool guess_packer=false) {
-        if (fname.empty()) return;
-        if (fstream.openRead(fname, guess_packer)==NULL) {
+       if (fname.empty()) return;
+       if (fstream.openRead(fname, guess_packer)==NULL) {
           fprintf(stderr, "Warning: couldn't open file %s\n",fname.c_str());
           return;
-        }
-        if (fstream.is_bam) {
+       }
+       if (fstream.is_bam) {
           if (b==NULL) {
         	b = bam_init1();
            }
-        }
-        else  {
+       }
+       else  {
           if (b) { bam_destroy1(b); b=NULL; }
           flseqs=new FLineReader(fstream.file);
-          skip_lines(*flseqs);
-        }
-        fquals=pquals;
-        if (fquals) {
+          read_format=skip_lines(*flseqs);
+          if (read_format==FASTX_AUTO)
+            warn_msg("Warning: could not determine format for file '%s'! (large header?)\n", fstream.filename.c_str());
+
+       }
+       fquals=pquals;
+       if (fquals) {
           flquals=new FLineReader(fquals->file);
           skip_lines(*flquals);
-          }
-        }
+       }
+    }
+
     void init(FZPipe& f_stream, FZPipe* pquals=NULL) {
         fstream=f_stream; //Warning - original copy may end up with invalid (closed) file handle
         stream_copy=true;
@@ -277,17 +333,19 @@ class ReadStream {
         else  {
           if (b) { bam_destroy1(b); b=NULL; }
           flseqs=new FLineReader(fstream.file);
-          skip_lines(*flseqs);
+          read_format=skip_lines(*flseqs);
+          if (read_format==FASTX_AUTO)
+            warn_msg("Warning: could not determine format for file '%s'! (large header?)\n", f_stream.filename.c_str());
         }
         fquals=pquals;
         if (fquals) {
           flquals=new FLineReader(fquals->file);
           skip_lines(*flquals);
-          }
         }
+    }
 
     //unbuffered reading from stream
-    bool get_direct(Read& read, ReadFormat read_format=FASTQ);
+    bool get_direct(Read& read);
     bool isBam() { return fstream.is_bam; }
     bam1_t* last_b() {//return the latest SAM record data fetched by get_direct()
       //must only be called after get_direct()
@@ -300,7 +358,7 @@ class ReadStream {
 
     //read_ids must ALWAYS be requested in increasing order
     bool getRead(uint64_t read_id, Read& read,
-		 ReadFormat read_format=FASTQ,
+		 //ReadFormat read_format=FASTQ,
 		 bool strip_slash=false,
 		 uint64_t begin_id = 0,
 		 uint64_t end_id=std::numeric_limits<uint64_t>::max(),
@@ -314,13 +372,22 @@ class ReadStream {
 		 int64_t* multimapped_counter=NULL
 		 */
 		 );
+    bool getQRead(uint64_t read_id,
+    		QReadData& read,
+    		uint64_t begin_id = 0,
+    		uint64_t end_id=std::numeric_limits<uint64_t>::max(),
+    		GetReadProc* rProc=NULL,
+    		bool is_unmapped=false //the target read, when found is also written by
+		                        //rProc into the unmapped BAM file
+		    );
+
 
     void rewind() {
       fstream.rewind();
       clear();
       if (flseqs) {
         flseqs->reset(fstream);
-        skip_lines(*flseqs);
+        skip_lines(*flseqs); //read_format detected earlier
         }
       if (flquals) {
     	flquals->reset(*fquals);
diff --git a/src/sam_juncs.cpp b/src/sam_juncs.cpp
index ea80e12..4c21079 100644
--- a/src/sam_juncs.cpp
+++ b/src/sam_juncs.cpp
@@ -118,8 +118,6 @@ int main(int argc, char** argv)
 	fprintf(stderr, "sam_juncs v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION); 
 	fprintf(stderr, "---------------------------------------\n");
 	
-	reads_format = FASTQ;
-	
     int parse_ret = parse_options(argc, argv, print_usage);
     if (parse_ret)
         return parse_ret;
diff --git a/src/segment_juncs.cpp b/src/segment_juncs.cpp
index 80fb3b7..e3acc46 100644
--- a/src/segment_juncs.cpp
+++ b/src/segment_juncs.cpp
@@ -450,23 +450,7 @@ void count_read_mers(string& reads_file, size_t half_splice_mer_len)
 	size_t mer_table_size = 1 << ((splice_mer_len)<<1);
 	extension_counts.resize(mer_table_size);
 	ReadStream readstream(reads_file);
-    //FLineReader fr(reads_file);
-	//while(!feof(reads_file))
-	while (readstream.get_direct(read, reads_format)) {
-	/*
-    while (!fr.isEof())
-	{
-		read.clear();
-		
-		// Get the next read from the file
-		if (!next_fasta_record(fr, read.name, read.seq, reads_format))
-		  break;
-		if (reads_format == FASTQ)
-		{
-		  if (!next_fastq_record(fr, read.seq, read.alt_name, read.qual, reads_format))
-		    break;
-		}
-      */
+	while (readstream.get_direct(read)) {
 		if (color && !readstream.isBam())
 		  // erase the primer and the adjacent color
 		  read.seq.erase(0, 2);
@@ -531,23 +515,7 @@ void store_read_mers(string& reads_file, size_t half_splice_mer_len)
 	
 	size_t num_indexed_reads = 0;
 	ReadStream readstream(reads_file);
-	while (readstream.get_direct(read, reads_format)) {
-	/*
-	FLineReader fr(reads_file);
-	//while(!feof(reads_file))
-	while(!fr.isEof())
-	{
-		read.clear();
-		
-		// Get the next read from the file
-		if (!next_fasta_record(fr, read.name, read.seq, reads_format))
-		  break;
-		if (reads_format == FASTQ)
-		{
-		  if (!next_fastq_record(fr, read.seq, read.alt_name, read.qual, reads_format))
-		    break;
-		}
-     */
+	while (readstream.get_direct(read)) {
 		if (color && !readstream.isBam())
 		  // erase the primer and the adjacent color
 		  read.seq.erase(0, 2);
@@ -2530,7 +2498,7 @@ void detect_small_insertion(RefSequenceTable& rt,
        bestInsertPosition + discrepancy <= (int)length(left_read_sequence)){
       String<char> insertedSequence = seqan::infix(left_read_sequence, bestInsertPosition, bestInsertPosition + discrepancy);
       if(color)
-	insertedSequence = convert_color_to_bp(genomic_sequence_temp[bestInsertPosition - begin_offset + end_offset - 1], insertedSequence);
+    	  insertedSequence = convert_color_to_bp(genomic_sequence_temp[bestInsertPosition - begin_offset + end_offset - 1], insertedSequence);
       insertions.insert(Insertion(leftHit.ref_id(),
 				  leftHit.left() + bestInsertPosition - 1 + end_offset,
 				  seqan::toCString(insertedSequence)));
diff --git a/src/tophat-fusion-post b/src/tophat-fusion-post
index 2c13592..20a8f44 100755
--- a/src/tophat-fusion-post
+++ b/src/tophat-fusion-post
@@ -311,6 +311,7 @@ def map_fusion_kmer(bwt_idx_prefix, params, sample_update = False):
     fusion_kmer_file_name = output_dir + "fusion_seq.map"
 
     if not os.path.exists(fusion_kmer_file_name) or \
+           os.stat(fusion_kmer_file_name).st_size <= 0 or \
            sample_update:
         get_fusion_seq()
         cmd = ['bowtie', '-p', '8', '-a', '-n', '3', '-m', '100', bwt_idx_prefix, '-f', '%sfusion_seq.fa' % output_dir]
@@ -1897,10 +1898,6 @@ def generate_html(params):
         for i in range(min(max_num_fusions, len(cluster_temp_list))):
             do_not_add = False
             indices = cluster_temp_list[i]["index"]
-
-            if num_samples(indices) > 5:
-                do_not_add = True
-
             if not do_not_add:
                 def cmp(a, b):
                     return int(fusion_list[b]["score"] - fusion_list[a]["score"])
@@ -2588,7 +2585,7 @@ def prog_path(program):
 
 
 def get_version():
-   return "2.0.9"
+   return "2.1.0"
 
 
 def main(argv=None):
diff --git a/src/tophat.py b/src/tophat.py
index d1685c5..94f5848 100755
--- a/src/tophat.py
+++ b/src/tophat.py
@@ -415,7 +415,7 @@ class TopHatParams:
                      color,
                      library_type,
                      seed_length,
-                     reads_format,
+                     # reads_format,
                      mate_inner_dist,
                      mate_inner_dist_std_dev,
                      read_group_id,
@@ -433,7 +433,7 @@ class TopHatParams:
             self.color = color
             self.library_type = library_type
             self.seed_length = seed_length
-            self.reads_format = reads_format
+            # self.reads_format = reads_format
             self.mate_inner_dist = mate_inner_dist
             self.mate_inner_dist_std_dev = mate_inner_dist_std_dev
             self.read_group_id = read_group_id
@@ -682,7 +682,7 @@ class TopHatParams:
                                            False,               # SOLiD - color space
                                            "",                  # library type (e.g. "illumina-stranded-pair-end")
                                            None,                # seed_length
-                                           "fastq",             # quality_format
+                                         # "fastq",             # quality_format
                                            None,                # mate inner distance
                                            20,                  # mate inner dist std dev
                                            None,                # read group id
@@ -714,6 +714,7 @@ class TopHatParams:
         self.transcriptome_only = False
         self.transcriptome_index = None
         self.transcriptome_outdir = None
+        self.transcriptome_buildonly = None
         self.raw_junctions = None
         self.resume_dir = None
         self.find_novel_juncs = True
@@ -1138,7 +1139,10 @@ class TopHatParams:
             tmp_dir = custom_tmp_dir
             sam_header = tmp_dir + "stub_header.sam"
         if len(args) < 2 and not self.resume_dir:
-            raise Usage(use_message)
+            if len(args) == 1 and self.transcriptome_index and self.gff_annotation:
+              self.transcriptome_buildonly = True
+            else:
+              raise Usage(use_message)
 
         if self.read_realign_edit_dist == None:
             self.read_realign_edit_dist = self.read_edit_dist + 1
@@ -1816,7 +1820,7 @@ class ZWriter:
 # to determines the file format
 def check_reads_format(params, reads_files):
     #seed_len = params.read_params.seed_length
-    fileformat = params.read_params.reads_format
+    #fileformat = params.read_params.reads_format
 
     observed_formats = set([])
     # observed_scales = set([])
@@ -1842,25 +1846,27 @@ def check_reads_format(params, reads_files):
                 max_seed_len = max(seq_len, max_seed_len)
         zf.close()
         observed_formats.add(freader.format)
-    if len(observed_formats) > 1:
-        die("Error: TopHat requires all reads be either FASTQ or FASTA.  Mixing formats is not supported.")
+#     if len(observed_formats) > 1:
+#         die("Error: TopHat requires all reads be either FASTQ or FASTA.  Mixing formats is not supported.")
     fileformat=list(observed_formats)[0]
-    #if seed_len != None:
-    #    seed_len = max(seed_len, max_seed_len)
-    #else:
-    #    seed_len = max_seed_len
-    #print >> sys.stderr, "\tmin read length: %dbp, max read length: %dbp" % (min_seed_len, max_seed_len)
-    th_logp("\tformat:\t\t %s" % fileformat)
-    if fileformat == "fastq":
-        quality_scale = "phred33 (default)"
-        if params.read_params.solexa_quals and not params.read_params.phred64_quals:
-            quality_scale = "solexa33 (reads generated with GA pipeline version < 1.3)"
-        elif params.read_params.phred64_quals:
-            quality_scale = "phred64 (reads generated with GA pipeline version >= 1.3)"
-        th_logp("\tquality scale:\t %s" % quality_scale)
-    elif fileformat == "fasta":
-        if params.read_params.color:
-            params.read_params.integer_quals = True
+#     #if seed_len != None:
+#     #    seed_len = max(seed_len, max_seed_len)
+#     #else:
+#     #    seed_len = max_seed_len
+#     #print >> sys.stderr, "\tmin read length: %dbp, max read length: %dbp" % (min_seed_len, max_seed_len)
+#     th_logp("\tformat:\t\t %s" % fileformat)
+#     if fileformat == "fastq":
+#         quality_scale = "phred33 (default)"
+#         if params.read_params.solexa_quals and not params.read_params.phred64_quals:
+#             quality_scale = "solexa33 (reads generated with GA pipeline version < 1.3)"
+#         elif params.read_params.phred64_quals:
+#             quality_scale = "phred64 (reads generated with GA pipeline version >= 1.3)"
+#         th_logp("\tquality scale:\t %s" % quality_scale)
+#     elif fileformat == "fasta":
+#         if params.read_params.color:
+#             params.read_params.integer_quals = True
+    if params.read_params.color and fileformat == "fasta":
+         params.read_params.integer_quals = True
 
     #print seed_len, format, solexa_scale
     #NOTE: seed_len will be re-evaluated later by prep_reads
@@ -1870,9 +1876,9 @@ def check_reads_format(params, reads_files):
                                    params.read_params.integer_quals,
                                    params.read_params.color,
                                    params.read_params.library_type,
-                                   #seed_len,
+                                   # seed_len,
                                    params.read_params.seed_length,
-                                   fileformat,
+                                   # fileformat,
                                    params.read_params.mate_inner_dist,
                                    params.read_params.mate_inner_dist_std_dev,
                                    params.read_params.read_group_id,
@@ -1970,10 +1976,10 @@ def prep_reads_cmd(params, l_reads_list, l_quals_list=None, r_reads_list=None, r
 
   prep_cmd.extend(params.cmd())
 
-  if params.read_params.reads_format == "fastq":
-      prep_cmd += ["--fastq"]
-  elif params.read_params.reads_format == "fasta":
-      prep_cmd += ["--fasta"]
+#   if params.read_params.reads_format == "fastq":
+#       prep_cmd += ["--fastq"]
+#   elif params.read_params.reads_format == "fasta":
+#       prep_cmd += ["--fasta"]
   if hits_to_filter:
     prep_cmd += ["--flt-hits=" + ",".join(hits_to_filter)]
   if aux_file:
@@ -2094,7 +2100,7 @@ def bowtie(params,
            bwt_idx_prefix,
            sam_headers,
            reads_list,
-           reads_format,
+           # reads_format,
            num_mismatches,
            gap_length,
            edit_dist,
@@ -2174,10 +2180,10 @@ def bowtie(params,
     # Launch Bowtie
     try:
         bowtie_cmd = [bowtie_path]
-        if reads_format == "fastq":
-            bowtie_cmd += ["-q"]
-        elif reads_format == "fasta":
-            bowtie_cmd += ["-f"]
+#         if reads_format == "fastq":
+#             bowtie_cmd += ["-q"]
+#         elif reads_format == "fasta":
+#             bowtie_cmd += ["-f"]
         if params.read_params.color:
             bowtie_cmd += ["-C", "--col-keepends"]
 
@@ -2188,8 +2194,8 @@ def bowtie(params,
            unzip_cmd=[ prog_path('bam2fastx'), "--all" ]
            if params.read_params.color:
                unzip_cmd.append("--color")
-           if reads_format:
-              unzip_cmd.append("--" + reads_format)
+           #if reads_format:
+           #   unzip_cmd.append("--" + reads_format)
            unzip_cmd+=[reads_list[0]]
 
         if use_zpacker and (unzip_cmd is None):
@@ -2240,7 +2246,8 @@ def bowtie(params,
         if not params.bowtie2:
            fix_map_cmd += ["--bowtie1"]
         if multihits_out != None:
-           fix_map_cmd += ["--aux-outfile", params.preflt_data[multihits_out].multihit_reads]
+           if params.bowtie2:
+               fix_map_cmd += ["--aux-outfile", params.preflt_data[multihits_out].multihit_reads]
            fix_map_cmd += ["--max-multihits", str(params.max_hits)]
         if t_mapping:
            out_bam = "-" # we'll pipe into map2gtf
@@ -3046,7 +3053,6 @@ def junctions_from_segments(params,
                             right_reads_map,
                             right_seg_maps,
                             unmapped_reads,
-                            reads_format,
                             ref_fasta):
     # if left_reads_map != left_seg_maps[0]:
 
@@ -3276,18 +3282,19 @@ def map2gtf(params, genome_sam_header_filename, ref_fasta, left_reads, right_rea
        m2g_bwt_idx = params.transcriptome_index
        th_log("Using pre-built transcriptome data..")
     else:
-       th_log("Building transcriptome data files..")
        if params.transcriptome_outdir:
          t_out_dir=params.transcriptome_outdir+"/"
+       th_log("Building transcriptome data files "+t_out_dir+gtf_name)
        m2g_ref_name  = t_out_dir + gtf_name
        m2g_ref_fasta = gtf_to_fasta(params, params.gff_annotation, ref_fasta, m2g_ref_name)
        m2g_bwt_idx = build_idx_from_fa(params.bowtie2, m2g_ref_fasta, t_out_dir, params.read_params.color)
        params.transcriptome_index = m2g_bwt_idx
-
+    if params.transcriptome_buildonly:
+       return
     transcriptome_header_filename = get_index_sam_header(params, m2g_bwt_idx)
 
     mapped_gtf_list = []
-    unmapped_gtf_list = []
+    unmapped_gtf_list = []    
     # do the initial mapping in GTF coordinates
     for reads in [left_reads, right_reads]:
         if reads == None or os.path.getsize(reads) < 25 :
@@ -3305,7 +3312,6 @@ def map2gtf(params, genome_sam_header_filename, ref_fasta, left_reads, right_rea
                                             m2g_bwt_idx,
                                             [transcriptome_header_filename, genome_sam_header_filename],
                                             [reads],
-                                            "fastq",
                                             params.read_mismatches,
                                             params.read_gap_length,
                                             params.read_edit_dist,
@@ -3508,7 +3514,6 @@ def spliced_alignment(params,
                                                    bwt_idx_prefix,
                                                    sam_header_filename,
                                                    [reads],
-                                                   "fastq",
                                                    params.read_mismatches,
                                                    params.read_gap_length,
                                                    params.read_edit_dist,
@@ -3546,7 +3551,6 @@ def spliced_alignment(params,
                                              bwt_idx_prefix,
                                              sam_header_filename,
                                              [seg],
-                                             "fastq",
                                              params.segment_mismatches,
                                              params.segment_mismatches,
                                              params.segment_mismatches,
@@ -3604,7 +3608,6 @@ def spliced_alignment(params,
                                         right_reads_map,
                                         right_seg_maps,
                                         unmapped_reads,
-                                        "fastq",
                                         ref_fasta)
 
         if not params.system_params.keep_tmp:
@@ -3704,7 +3707,6 @@ def spliced_alignment(params,
                                                  tmp_dir + junc_idx_prefix,
                                                  juncs_bwt_samheader,
                                                  [seg],
-                                                 "fastq",
                                                  params.segment_mismatches,
                                                  params.segment_mismatches,
                                                  params.segment_mismatches,
@@ -3837,7 +3839,9 @@ def main(argv=None):
         params.check()
 
         bwt_idx_prefix = args[0]
-        left_reads_list = args[1]
+        left_reads_list = None
+        if len(args)>1:
+          left_reads_list = args[1]
         left_quals_list, right_quals_list = None, None
         if (not params.read_params.quals and len(args) > 2) or (params.read_params.quals and len(args) > 3):
             if params.read_params.mate_inner_dist == None:
@@ -3854,6 +3858,7 @@ def main(argv=None):
                 left_quals_list = args[2]
 
         start_time = datetime.now()
+        
         prepare_output_dir()
         init_logger(logging_dir + "tophat.log")
 
@@ -3861,7 +3866,10 @@ def main(argv=None):
         if resumeStage>0:
            th_log("Resuming TopHat run in directory '"+output_dir+"' stage '"+stageNames[resumeStage]+"'")
         else:
-           th_log("Beginning TopHat run (v"+get_version()+")")
+           if params.transcriptome_buildonly:
+             th_log("Building transcriptome files with TopHat v"+get_version())
+           else:
+             th_log("Beginning TopHat run (v"+get_version()+")")
         th_logp("-----------------------------------------------")
 
         global run_log
@@ -3913,7 +3921,11 @@ def main(argv=None):
            #end @ transcriptome_index given
 
         (ref_fasta, ref_seq_dict) = check_index(bwt_idx_prefix, params.bowtie2)
-
+        if params.transcriptome_buildonly:
+             map2gtf(params, "", ref_fasta, [], [])
+             th_logp("-----------------------------------------------")
+             th_log("Transcriptome files prepared. This was the only task requested.")
+             return
         if currentStage >= resumeStage:
            th_log("Generating SAM header for "+bwt_idx_prefix)
         # we need to provide another name for this sam header as genome and transcriptome may have the same prefix.
@@ -3978,7 +3990,7 @@ def main(argv=None):
                th_log("Pre-filtering multi-mapped "+sides[ri]+" reads")
                rdlist=reads_list.split(',')
                bwt=bowtie(params, bwt_idx_prefix, sam_header_filename, rdlist,
-                          params.read_params.reads_format,
+                          # params.read_params.reads_format,
                           params.read_mismatches,
                           params.read_gap_length,
                           params.read_edit_dist,
@@ -4059,7 +4071,7 @@ def main(argv=None):
 
 
         th_logp("-----------------------------------------------")
-        th_log("A summary of the alignment counts can be found in %salign_summary.txt" % output_dir);
+        th_log("A summary of the alignment counts can be found in %salign_summary.txt" % output_dir)
         th_log("Run complete: %s elapsed" %  formatTD(duration))
 
     except Usage, err:
diff --git a/src/tophat_reports.cpp b/src/tophat_reports.cpp
index 9d0a927..91e39df 100644
--- a/src/tophat_reports.cpp
+++ b/src/tophat_reports.cpp
@@ -126,38 +126,34 @@ char read_best_alignments(const HitsForRead& hits_for_read,
 		ret_code |= 16;
 		return ret_code; //too many hits
 	}*/
- unsigned int nhits=0;
+	unsigned int nhits=0;
 	for (size_t i = 0; i < hits.size(); ++i)
 	{
-			if (hits[i].mismatches() > read_mismatches ||
-					hits[i].gap_length() > read_gap_length ||
-					hits[i].edit_dist() > read_edit_dist)
-				continue;
-			ret_code |= 1; //read has a valid mapping
-			nhits++;
-			if (nhits>1) ret_code |= 4; //read has multiple valid mappings
-			if (nhits>max_multihits) ret_code |= 16; //read has too many valid mappings
-			BowtieHit hit = hits[i];
-			AlignStatus align_status(hit, gtf_junctions,
-					junctions, insertions, deletions, fusions, coverage);
-			hit.alignment_score(align_status._alignment_score);
-			if (report_secondary_alignments || !final_report)
+		if (hits[i].mismatches() > read_mismatches ||
+				hits[i].gap_length() > read_gap_length ||
+				hits[i].edit_dist() > read_edit_dist)
+			continue;
+		BowtieHit hit = hits[i];
+		AlignStatus align_status(hit, gtf_junctions,
+				junctions, insertions, deletions, fusions, coverage);
+		hit.alignment_score(align_status._alignment_score);
+		if (report_secondary_alignments || !final_report)
+		{
+			best_hits.hits.push_back(hit);
+		}
+		else
+		{
+			// Is the new status better than the current best one?
+			if (best_hits.hits.size() == 0 || cmp_read_alignment()(hit, best_hits.hits[0]))
 			{
+				best_hits.hits.clear();
 				best_hits.hits.push_back(hit);
 			}
-			else
+			else if (!cmp_read_alignment()(best_hits.hits[0], hit)) // is it just as good?
 			{
-				// Is the new status better than the current best one?
-				if (best_hits.hits.size() == 0 || cmp_read_alignment()(hit, best_hits.hits[0]))
-				{
-					best_hits.hits.clear();
-					best_hits.hits.push_back(hit);
-				}
-				else if (!cmp_read_alignment()(best_hits.hits[0], hit)) // is it just as good?
-				{
-					best_hits.hits.push_back(hit);
-				}
+				best_hits.hits.push_back(hit);
 			}
+		}
 	}
 
 	// due to indel alignments, there may be alignments with the same location
@@ -167,16 +163,16 @@ char read_best_alignments(const HitsForRead& hits_for_read,
 
 	if ((report_secondary_alignments || !final_report) && best_hits.hits.size() > 0)
 	{
-			sort(best_hits.hits.begin(), best_hits.hits.end(), cmp_read_alignment());
+		sort(best_hits.hits.begin(), best_hits.hits.end(), cmp_read_alignment());
 	}
 
-	if (final_report)
-	{
-			if (suppress_hits && best_hits.hits.size() > max_multihits)
+	if (final_report) {
+		if (best_hits.hits.size() > max_multihits) {
+			ret_code |= 16; //read has too many valid mappings
+			if (suppress_hits)
 				best_hits.hits.clear();
-
-			if (best_hits.hits.size() > max_multihits)
-			{
+			else {
+				//if (best_hits.hits.size() > max_multihits)
 				// there may be several alignments with the same alignment scores,
 				// all of which we can not include because of this limit.
 				// let's pick up some of them randomly up to this many max multihits.
@@ -208,6 +204,11 @@ char read_best_alignments(const HitsForRead& hits_for_read,
 
 				best_hits.hits.erase(best_hits.hits.begin() + max_multihits, best_hits.hits.end());
 			}
+		}
+		if ((nhits=best_hits.hits.size())>0) {
+			ret_code |= 1; //read has a valid mapping
+			if (nhits>1) ret_code |= 4; //read has multiple mappings
+		}
 	}
 	return ret_code;
 }
@@ -314,16 +315,25 @@ struct SAlignStats {
 	int64_t num_unmapped_left; //total left reads unmapped, or mapped in too many places (!)
 	int64_t num_aligned_left_multi; //total left reads mapped in more than 1 place
 	int64_t num_aligned_left_xmulti; //total left reads mapped in too many places (> max_multihits)
+
 	int64_t num_aligned_right; //total right reads aligned
 	int64_t num_unmapped_right; //total right reads unmapped, or mapped in too many places (!)
 	int64_t num_aligned_right_multi; //total right reads in more than 1 place
 	int64_t num_aligned_right_xmulti; //total right reads mapped in too many places (> max_multihits)
+
 	int64_t num_aligned_pairs; //total pairs aligned
 	int64_t num_aligned_pairs_multi; //total pairs aligned in more than 1 place
 	int64_t num_aligned_pairs_disc; //total pairs aligned discordantly only
+
+	int64_t num_aligned_unpaired; //total unpaired reads aligned, when mixed with PE
+	int64_t num_unmapped_unpaired; //total right reads unmapped, or mapped in too many places (!)
+	int64_t num_aligned_unpaired_multi;
+	int64_t num_aligned_unpaired_xmulti; //total right reads mapped in too many places (> max_multihits)
+
 	SAlignStats():num_aligned_left(0), num_unmapped_left(0), num_aligned_left_multi(0), num_aligned_left_xmulti(0), num_aligned_right(0),
 			num_unmapped_right(0), num_aligned_right_multi(0), num_aligned_right_xmulti(0), num_aligned_pairs(0), num_aligned_pairs_multi(0),
-			num_aligned_pairs_disc(0) { }
+			num_aligned_pairs_disc(0),  num_aligned_unpaired(0), num_unmapped_unpaired(0),
+			num_aligned_unpaired_multi(0), num_aligned_unpaired_xmulti(0) { }
 	void add(SAlignStats& a) {
 		num_aligned_left+=a.num_aligned_left;
 		num_unmapped_left+=a.num_unmapped_left;
@@ -336,6 +346,10 @@ struct SAlignStats {
 		num_aligned_pairs+=a.num_aligned_pairs;
 		num_aligned_pairs_multi+=a.num_aligned_pairs_multi;
 		num_aligned_pairs_disc+=a.num_aligned_pairs_disc;
+		num_aligned_unpaired+=a.num_aligned_unpaired;
+		num_unmapped_unpaired+=a.num_unmapped_unpaired;
+		num_aligned_unpaired_multi+=a.num_aligned_unpaired_multi;
+		num_aligned_unpaired_xmulti+=a.num_aligned_unpaired_xmulti;
 	}
 
 };
@@ -367,7 +381,7 @@ char pair_best_alignments(const HitsForRead& left_hits,
 	if (ret_code)
 		return ret_code;
 	*/
-	unsigned int l_nhits=0, r_nhits=0;
+	unsigned int nhits=0;
 
 	vector<BowtieHit> rhits;
 
@@ -377,34 +391,25 @@ char pair_best_alignments(const HitsForRead& left_hits,
 				right[j].gap_length() > read_gap_length ||
 				right[j].edit_dist() > read_edit_dist)
 			continue;
-			r_nhits++;
-			ret_code|=2; //right read has acceptable mappings
-			if (r_nhits>1) ret_code |= 8; //right read has multiple valid mappings
-			if (r_nhits>max_multihits) {
-				ret_code |= 32; //left read has too many mappings
-				//if (r_nhits > (max_multihits<<2))
-					break;
-			}
-			BowtieHit rh = right[j];
-			AlignStatus align_status(rh, gtf_junctions,
-					junctions, insertions, deletions, fusions, coverage);
-			rh.alignment_score(align_status._alignment_score);
-			rhits.push_back(rh);
+		nhits++;
+		if ((nhits>>1) > max_multihits)
+			break;
+		BowtieHit rh = right[j];
+		AlignStatus align_status(rh, gtf_junctions,
+				junctions, insertions, deletions, fusions, coverage);
+		rh.alignment_score(align_status._alignment_score);
+		rhits.push_back(rh);
 	}
-
+    nhits=0;
 	for (size_t i = 0; i < left.size(); ++i)
 	{
 		if (left[i].mismatches() > read_mismatches ||
 				left[i].gap_length() > read_gap_length ||
 				left[i].edit_dist() > read_edit_dist)
 			continue;
-		l_nhits++;
-		ret_code|=1; //left read has acceptable mappings
-		if (l_nhits>1) ret_code |= 4; //left read has multiple valid mappings
-		if (l_nhits>max_multihits) {
-			ret_code |= 16; //left read has too many mappings
-			//if (l_nhits > (max_multihits<<2))
-				break;
+		nhits++;
+		if ((nhits>>1)>max_multihits) {
+		    break;
 		}
 		BowtieHit lh = left[i];
 		AlignStatus align_status(lh, gtf_junctions,
@@ -516,8 +521,15 @@ if (lh.insert_id() == 10790262)
 
 				if (final_report)
 				{
-					if (suppress_hits && best_hits.size() > max_multihits)
-						best_hits.clear();
+					nhits = best_hits.size();
+					if (nhits>0) {
+					  ret_code|=3; //both reads have acceptable mappings
+					}
+					if (nhits>1) ret_code |= 12; //both reads have multiple valid mappings
+					if (nhits>max_multihits) {
+						ret_code |= 48; //both reads have too many mappings
+						if (suppress_hits) best_hits.clear();
+					}
 
 					if (best_hits.size() > max_multihits)
 					{
@@ -597,37 +609,37 @@ void add_auxData(vector<string>& auxdata, vector<string>& sam_toks,
     // requirement, and that the strand indicated by the alignment is consistent
     // with the orientation of the splices (though that should be handled upstream).
   if (!XS_found) {
-    const string xs_f("XS:A:+");
-    const string xs_r("XS:A:-");
-    if (library_type == FR_FIRSTSTRAND) {
-      if (insert_side == FRAG_LEFT || insert_side == FRAG_UNPAIRED) {
-	if (bh.antisense_align())
-	  auxdata.push_back(xs_f);
-	else
-	  auxdata.push_back(xs_r);
-      }
-      else {
-	if (bh.antisense_align())
-	  auxdata.push_back(xs_r);
-	else
-	  auxdata.push_back(xs_f);
-      }
-    }
-    else if (library_type == FR_SECONDSTRAND)   {
-      if (insert_side == FRAG_LEFT || insert_side == FRAG_UNPAIRED){
-	if (bh.antisense_align())
-	  auxdata.push_back(xs_r);
-	else
-	  auxdata.push_back(xs_f);
-      }
-      else
-	{
-	  if (bh.antisense_align())
-	    auxdata.push_back(xs_f);
-	  else
-	    auxdata.push_back(xs_r);
-	}
-    }
+	  const string xs_f("XS:A:+");
+	  const string xs_r("XS:A:-");
+	  if (library_type == FR_FIRSTSTRAND) {
+		  if (insert_side == FRAG_LEFT || insert_side == FRAG_UNPAIRED) {
+			  if (bh.antisense_align())
+				  auxdata.push_back(xs_f);
+			  else
+				  auxdata.push_back(xs_r);
+		  }
+		  else {
+			  if (bh.antisense_align())
+				  auxdata.push_back(xs_r);
+			  else
+				  auxdata.push_back(xs_f);
+		  }
+	  }
+	  else if (library_type == FR_SECONDSTRAND)   {
+		  if (insert_side == FRAG_LEFT || insert_side == FRAG_UNPAIRED){
+			  if (bh.antisense_align())
+				  auxdata.push_back(xs_r);
+			  else
+				  auxdata.push_back(xs_f);
+		  }
+		  else
+		  {
+			  if (bh.antisense_align())
+				  auxdata.push_back(xs_f);
+			  else
+				  auxdata.push_back(xs_r);
+		  }
+	  }
   }
   if (hitIndex >= 0)
     {
@@ -2106,16 +2118,17 @@ void print_alnStats(SAlignStats& alnStats) {
 	FILE* f = fopen(fname.c_str(), "w");
 	int64_t total_left=alnStats.num_aligned_left+alnStats.num_unmapped_left;
 	int64_t total_right=alnStats.num_aligned_right+alnStats.num_unmapped_right;
+	int64_t total_unpaired=alnStats.num_aligned_unpaired+alnStats.num_unmapped_unpaired;
 	//int64_t accepted_left =alnStats.num_aligned_left -alnStats.num_aligned_left_xmulti; //accepted mappings, < max_multihits
 	//int64_t accepted_right=alnStats.num_aligned_right-alnStats.num_aligned_right_xmulti; //accepted right mappings
 	string rdn("Left reads");
 	if (total_right==0) rdn="Reads";
 	fprintf(f, "%s:\n", rdn.c_str());
-	fprintf(f, "               Input: %9ld\n", total_left);
+	fprintf(f, "          Input     : %9ld\n", total_left);
 	double perc=(100.0*alnStats.num_aligned_left)/total_left;
-	fprintf(f, "              Mapped: %9ld (%4.1f%% of input)\n",
+	fprintf(f, "           Mapped   : %9ld (%4.1f%% of input)\n",
 			alnStats.num_aligned_left, perc);
-	if (alnStats.num_aligned_left) {
+	if (alnStats.num_aligned_left && alnStats.num_aligned_left_multi>0) {
 		perc=(100.0*alnStats.num_aligned_left_multi)/alnStats.num_aligned_left;
 	 fprintf(f,"            of these: %9ld (%4.1f%%) have multiple alignments (%ld have >%d)\n",
 			alnStats.num_aligned_left_multi, perc, alnStats.num_aligned_left_xmulti, max_multihits);
@@ -2129,33 +2142,48 @@ void print_alnStats(SAlignStats& alnStats) {
 	int64_t total_pairs=0;
 	if (total_right) {
 		fprintf(f, "Right reads:\n");
-		fprintf(f, "               Input: %9ld\n", total_right);
+		fprintf(f, "          Input     : %9ld\n", total_right);
 		total_input+=total_right;
 		perc=(100.0*alnStats.num_aligned_right)/total_right;
-		fprintf(f, "              Mapped: %9ld (%4.1f%% of input)\n",
+		fprintf(f, "           Mapped   : %9ld (%4.1f%% of input)\n",
 				alnStats.num_aligned_right, perc);
-		if (alnStats.num_aligned_right) {
+		if (alnStats.num_aligned_right && alnStats.num_aligned_right_multi>0) {
 			perc=(100.0* alnStats.num_aligned_right_multi)/alnStats.num_aligned_right;
 			fprintf(f,"            of these: %9ld (%4.1f%%) have multiple alignments (%ld have >%d)\n",
 					alnStats.num_aligned_right_multi, perc, alnStats.num_aligned_right_xmulti, max_multihits);
 		}
-/*	perc=(100.0*accepted_right)/total_left;
-		fprintf(f, "   Mapped acceptably: %9ld (%4.1f%% of input)\n",
-				accepted_right, perc);
-*/
 		total_mapped+=alnStats.num_aligned_right;
 		total_pairs=(total_right<total_left)? total_right : total_left;
+
+		if (total_unpaired) {
+				fprintf(f, "Unpaired reads:\n");
+				fprintf(f, "          Input     : %9ld\n", total_unpaired);
+				total_input+=total_unpaired;
+				perc=(100.0*alnStats.num_aligned_unpaired)/total_unpaired;
+				fprintf(f, "           Mapped   : %9ld (%4.1f%% of input)\n",
+						alnStats.num_aligned_unpaired, perc);
+				if (alnStats.num_aligned_unpaired && alnStats.num_aligned_unpaired_multi>0) {
+					perc=(100.0* alnStats.num_aligned_unpaired_multi)/alnStats.num_aligned_unpaired;
+					fprintf(f,"            of these: %9ld (%4.1f%%) have multiple alignments (%ld have >%d)\n",
+							alnStats.num_aligned_unpaired_multi, perc, alnStats.num_aligned_unpaired_xmulti, max_multihits);
+				}
+		total_mapped+=alnStats.num_aligned_unpaired;
+		}
 	}
 	perc=(100.0*total_mapped)/total_input;
-	fprintf(f, "%4.1f%% overall read alignment rate.\n", perc);
+	fprintf(f, "%4.1f%% overall read mapping rate.\n", perc);
 	if (alnStats.num_aligned_pairs) {
 		fprintf(f, "\nAligned pairs: %9ld\n", alnStats.num_aligned_pairs);
-		perc=(100.0*alnStats.num_aligned_pairs_multi)/alnStats.num_aligned_pairs;
-		fprintf(f, "     of these: %9ld (%4.1f%%) have multiple alignments\n",
+		if (alnStats.num_aligned_pairs_multi>0) {
+		  perc=(100.0*alnStats.num_aligned_pairs_multi)/alnStats.num_aligned_pairs;
+		  fprintf(f, "     of these: %9ld (%4.1f%%) have multiple alignments\n",
 				alnStats.num_aligned_pairs_multi, perc);
-		perc=(100.0*alnStats.num_aligned_pairs_disc)/alnStats.num_aligned_pairs;
-		fprintf(f, "          and: %9ld (%4.1f%%) are discordant alignments\n",
+		}
+		if (alnStats.num_aligned_pairs_disc>0) {
+		  perc=(100.0*alnStats.num_aligned_pairs_disc)/alnStats.num_aligned_pairs;
+		  fprintf(f, "               %9ld (%4.1f%%) are discordant alignments\n",
 				alnStats.num_aligned_pairs_disc, perc);
+		}
 		perc=(100.0*(alnStats.num_aligned_pairs-alnStats.num_aligned_pairs_disc))/total_pairs;
 		fprintf(f, "%4.1f%% concordant pair alignment rate.\n", perc);
 	}
@@ -2163,43 +2191,24 @@ void print_alnStats(SAlignStats& alnStats) {
 }
 
 struct CReadProc: public GetReadProc {
-	CReadProc(GBamWriter* bamw=NULL, int64_t* um_counter=NULL, int64_t* mm_counter=NULL):
-		GetReadProc(bamw, um_counter, mm_counter) {}
-	virtual bool process(QReadData& rdata, bool& found, bool is_unmapped) {
-		//if (um_out && ((um_code && found) || !found )) {
-		string rname(rdata.read.alt_name);
-		//-- DEBUG
-		//fprintf(stderr, ">WUM\t%c\t%s\t%c\t/%d\n", found?'F':'U',
-		//		rname.c_str(), (um_code && found) ? um_code: '-', rdata.matenum);
-		//-- DEBUG.
-		size_t slash_pos=rname.rfind('/');
-		if (slash_pos!=string::npos)
-			rname.resize(slash_pos);
-		GBamRecord bamrec(rname.c_str(), -1, 0, false, rdata.read.seq.c_str(),
-				NULL, rdata.read.qual.c_str());
-		if (rdata.matenum) {
-			bamrec.set_flag(BAM_FPAIRED);
-			if (rdata.matenum==1) bamrec.set_flag(BAM_FREAD1);
-			else bamrec.set_flag(BAM_FREAD2);
-		}
-		//if (found && um_code && !rdata.trashCode) {
-		//rdata.trashCode=um_code;
-		//}
-		if (rdata.trashCode) {
-			//multi-mapped reads did not really QC-fail
-			//should also not be written to unmapped.bam
-			bamrec.add_aux("ZT", 'A', 1, (uint8_t*)&rdata.trashCode);
-			if (rdata.trashCode!='M') {
-				bamrec.set_flag(BAM_FQCFAIL); //to be excluded from further processing?
-			}
-		}
-		if (is_unmapped || (rdata.trashCode!='M' && !found)) {
-			um_out->write(&bamrec);
+	CReadProc(GBamWriter* bamw=NULL, int64_t* um_counter=NULL, int64_t* mm_counter=NULL,
+			int64_t* u_um_counter=NULL, int64_t* u_mm_counter=NULL):
+		GetReadProc(bamw, um_counter, mm_counter, u_um_counter, u_mm_counter) {}
+	virtual bool process(QReadData& rdata, bool& found) { //, bool is_unmapped) {
+		//if (is_unmapped || (rdata.trashCode!='M' && !found)) {
+		if (rdata.trashCode!='M' && !found) {
+			this->writeUnmapped(rdata);
 		}
 		//
 		if (unmapped_counter && !found) {
-			if (rdata.trashCode!='M') (*unmapped_counter)++;
-			else if (multimapped_counter) (*multimapped_counter)++;
+			if (rdata.trashCode!='M') {
+				if (rdata.matenum) (*unmapped_counter)++;
+				else if (u_unmapped_counter) (*u_unmapped_counter)++;
+			}
+			else if (multimapped_counter) {
+				if (rdata.matenum) (*multimapped_counter)++;
+				else if (u_multimapped_counter) (*u_multimapped_counter)++;
+			}
 		}
 		return true;
 	}
@@ -2211,27 +2220,48 @@ struct ReportWorker {
 			insertions(NULL), deletions(NULL), rev_deletions(NULL), fusions(NULL), coverage(NULL),
 			final_junctions(NULL), final_insertions(NULL), final_deletions(NULL), final_fusions(NULL),
 			rt(r), begin_id(0), end_id(0),  left_reads_offset(0), left_map_offset(0), right_reads_offset(0),
-			right_map_offset(0), alnStats(s), is_paired(false) {  }
+			right_map_offset(0), alnStats(s), PE_reads(false) {  }
 
 	void write_singleton_alignments(uint64_t curr_obs_order,
 	    HitsForRead& curr_hit_group, ReadStream& reads_file,
 	    GBamWriter& bam_writer, FragmentType fragment_type,
 	    GetReadProc* readProc=NULL,
-	    Read* gotRead=NULL) {
+	    QReadData* gotRead=NULL) {
 		int64_t* unmapped_counter = & alnStats->num_unmapped_left;
 		int64_t* aligned_counter = & alnStats->num_aligned_left;
 		int64_t* aligned_counter_multi = & alnStats->num_aligned_left_multi;
 		int64_t* aligned_counter_xmulti = & alnStats->num_aligned_left_xmulti;
-		if (fragment_type == FRAG_RIGHT) {
+
+		QReadData read;
+		if (gotRead==NULL) {
+			if (reads_file.getQRead(curr_obs_order, read,
+			                   begin_id, end_id, readProc))
+                  gotRead=&read;
+		}
+		if (gotRead==NULL) {
+			//Should never happen!
+			warn_msg("Error: singleton getQRead() failed for id# %ld.\n", curr_obs_order);
+			return;
+		}
+
+		if (gotRead->matenum==0) {
+			 fragment_type = FRAG_UNPAIRED;
+			 if (PE_reads) {
+				 unmapped_counter = & alnStats->num_unmapped_unpaired;
+				 aligned_counter = & alnStats->num_aligned_unpaired;
+				 aligned_counter_multi = & alnStats->num_aligned_unpaired_multi;
+				 aligned_counter_xmulti = & alnStats->num_aligned_unpaired_xmulti;
+			 }
+		} else if (fragment_type == FRAG_RIGHT) {
 			unmapped_counter = & alnStats->num_unmapped_right;
 			aligned_counter = & alnStats->num_aligned_right;
 			aligned_counter_multi = & alnStats->num_aligned_right_multi;
 			aligned_counter_xmulti = & alnStats->num_aligned_right_xmulti;
 		}
-		if (is_paired && !report_mixed_alignments) {
-			// FIXME: how did we get here?
-			// can this be a problem if input reads are mixed: paired + single ?
-			if (!gotRead) {
+
+		if (PE_reads && !report_mixed_alignments) {
+			//the user only wants paired alignments reported
+			/* if (!gotRead) {
 				if (curr_hit_group.hits.size()>1) {
 					(*aligned_counter_multi)++;
 				}
@@ -2239,60 +2269,52 @@ struct ReportWorker {
 					(*aligned_counter)++;
 				}
 			}
+			*/
+			if (readProc) readProc->writeUnmapped(*gotRead);
+			(*unmapped_counter)++;
 			return;
 		}
 		HitsForRead best_hits;
 		best_hits.insert_id = curr_obs_order;
 		realign_reads(curr_hit_group, *rt, *junctions, *rev_junctions, *insertions,
-		    *deletions, *rev_deletions, *fusions);
+				*deletions, *rev_deletions, *fusions);
 
 		exclude_hits_on_filtered_junctions(*junctions, curr_hit_group);
 
 		// Process hits for singleton, select best alignments
 		const bool final_report = true;
 		char map_flags=read_best_alignments(curr_hit_group, best_hits, *gtf_junctions, *junctions,
-		    *insertions, *deletions, *fusions, *coverage, final_report, &rng);
+				*insertions, *deletions, *fusions, *coverage, final_report, &rng);
 
-		string read_alt_name;
-		bool got_read = false;
-		if (gotRead!=NULL) {
-			read_alt_name=gotRead->alt_name;
-			got_read = true;
+		char map_code=0;
+		if (map_flags & 4) {
+			(*aligned_counter_multi)++;
 		}
+		if (map_flags & 16) {
+			map_code='M';
+			(*aligned_counter_xmulti)++;
+		}
+		if (map_flags & 1) (*aligned_counter)++; //acceptable mappings found
 		else {
-			Read read;
-			char map_code=0;
-			if (map_flags & 4) {
-				(*aligned_counter_multi)++;
-			}
-			if (map_flags & 16) {
-				map_code='M';
-				(*aligned_counter_xmulti)++;
-			}
-			if (map_flags & 1) (*aligned_counter)++; //acceptable mappings found
-			else (*unmapped_counter)++;
-			//CReadProc readProc(&um_out, unmapped_counter, aligned_counter_xmulti);
-			got_read=reads_file.getRead(curr_obs_order, read, reads_format,
-			    false, begin_id, end_id, readProc, (map_flags & 1)==0 );
-			    //&um_out, map_code, unmapped_counter, aligned_counter_xmulti);
-			read_alt_name=read.alt_name;
+			if (!gotRead->um_written)
+				(*unmapped_counter)++;
+			if (readProc)
+				readProc->writeUnmapped(*gotRead);
 		}
+
 		if (best_hits.hits.size() > 0) {
-			if (got_read) {
 				update_junctions(best_hits, *final_junctions);
 				update_insertions_and_deletions(best_hits, *final_insertions,
-				    *final_deletions);
+						*final_deletions);
 				update_fusions(best_hits, *rt, *final_fusions, *fusions);
-
-				print_sam_for_single(*rt, best_hits, fragment_type, read_alt_name,
-				    bam_writer, rng);
-			}
-			else {
-				//Should never happen!
-				fprintf(stderr, "Warning: getRead() failed for id# %ld.\n", curr_obs_order);
-			}
+				if (!gotRead->um_written) {
+					print_sam_for_single(*rt, best_hits, fragment_type, gotRead->read.alt_name,
+							bam_writer, rng);
+				}
+		}
+		else {
+			readProc->writeUnmapped(*gotRead);
 		}
-		//else reads_file.
 	}
 
 	void operator()() {
@@ -2301,7 +2323,7 @@ struct ReportWorker {
 		ReadTable it;
 		GBamWriter bam_writer(bam_output_fname.c_str(), sam_header.c_str());
 
-		is_paired = right_map_fnames.size() > 0;
+		PE_reads = right_map_fnames.size() > 0;
 
 		ReadStream left_reads_file(left_reads_fname);
 		if (left_reads_file.file() == NULL)
@@ -2347,12 +2369,17 @@ struct ReportWorker {
 		const bool final_report = true;
 
 		// While we still have unreported hits...
-		Read l_read;
-		Read r_read;
+		QReadData l_read;
+		QReadData r_read;
+		int64_t* num_unmapped_unpaired = (PE_reads) ? &(alnStats->num_unmapped_unpaired) :
+				&(alnStats->num_unmapped_left);
+		int64_t* num_aligned_unpaired_xmulti= (PE_reads) ?  &(alnStats->num_aligned_unpaired_xmulti) :
+				&(alnStats->num_aligned_left_xmulti);
 		CReadProc l_readProc(left_um_out, &(alnStats->num_unmapped_left),
-		    &(alnStats->num_aligned_left_xmulti));
+		    &(alnStats->num_aligned_left_xmulti), num_unmapped_unpaired, num_aligned_unpaired_xmulti);
 		CReadProc r_readProc(right_um_out, &(alnStats->num_unmapped_right),
-		    &(alnStats->num_aligned_right_xmulti));
+		    &(alnStats->num_aligned_right_xmulti), &(alnStats->num_unmapped_unpaired),
+		    &(alnStats->num_aligned_unpaired_xmulti));
 
 		while ((curr_left_obs_order != VMAXINT32
 		    || curr_right_obs_order != VMAXINT32)
@@ -2366,7 +2393,7 @@ struct ReportWorker {
 			    && curr_left_obs_order < end_id && curr_left_obs_order != VMAXINT32) {
 				write_singleton_alignments(curr_left_obs_order, curr_left_hit_group,
 				    left_reads_file, bam_writer, //*left_um_out,
-				    is_paired ? FRAG_LEFT : FRAG_UNPAIRED, &l_readProc);
+				    PE_reads ? FRAG_LEFT : FRAG_UNPAIRED, &l_readProc);
 
 				// Get next hit group
 				left_hs.next_read_hits(curr_left_hit_group);
@@ -2387,7 +2414,7 @@ struct ReportWorker {
 			}
 
 			// Since we have both left hits and right hits for this insert,
-			// Find the best pairing and print both
+			//  find the best pairing and print both alignments
 			while (curr_left_obs_order == curr_right_obs_order
 			    && curr_left_obs_order < end_id && curr_left_obs_order != VMAXINT32) {
 				realign_reads(curr_left_hit_group, *rt, *junctions, *rev_junctions,
@@ -2403,44 +2430,54 @@ struct ReportWorker {
 				bool paired_alignments = curr_left_hit_group.hits.size() > 0
 				    && curr_right_hit_group.hits.size() > 0;
 				InsertAlignmentGrade grade;
-				bool got_left_read = false;
-				bool got_right_read = false;
-				if (paired_alignments) {
-					char pair_map_flags=pair_best_alignments(curr_left_hit_group, curr_right_hit_group, grade,
+				bool got_left_read = left_reads_file.getQRead(curr_left_obs_order,
+						    	l_read, begin_id, end_id, &l_readProc);
+				if (!got_left_read) {
+					warn_msg("Error: failed to retrieve left read for pair # %ld !\n", curr_left_obs_order);
+				}
+				bool got_right_read = right_reads_file.getQRead(curr_right_obs_order, r_read,
+					    begin_id, end_id, &r_readProc);
+				if (!got_right_read) {
+					warn_msg("Error: failed to retrieve right read for pair # %ld !\n", curr_right_obs_order);
+				}
+				char pair_map_flags=0;
+				if (paired_alignments) { //there are *some* paired alignments
+					pair_map_flags=pair_best_alignments(curr_left_hit_group, curr_right_hit_group, grade,
 					    best_hits, *gtf_junctions, *junctions, *insertions, *deletions,
 					    *fusions, *coverage, final_report, &rng);
+					/*
 					if (pair_map_flags & 1) alnStats->num_aligned_left++;
 					else alnStats->num_unmapped_left++;
 					if (pair_map_flags & 2) alnStats->num_aligned_right++;
 					else alnStats->num_unmapped_right++;
 					if ((pair_map_flags & 3)==3) //at least one acceptable alignment was found for each read
 						alnStats->num_aligned_pairs++;
-					if (pair_map_flags & 4) alnStats->num_aligned_left_multi++;
-					if (pair_map_flags & 8) alnStats->num_aligned_right_multi++;
-					if ((pair_map_flags & 12) == 12) alnStats->num_aligned_pairs_multi++;
 					char left_map_code=0;
+					*/
+					/*
+					//if (pair_map_flags & 4) alnStats->num_aligned_left_multi++;
+					//if (pair_map_flags & 8) alnStats->num_aligned_right_multi++;
+					if ((pair_map_flags & 12) == 12) alnStats->num_aligned_pairs_multi++;
+
 					if (pair_map_flags & 16) {
-						left_map_code='M';
+						//left_map_code='M';
 						alnStats->num_aligned_left_xmulti++;
 					}
 					char right_map_code=0;
 					if (pair_map_flags & 32) {
-						right_map_code='M';
+						//right_map_code='M';
 						alnStats->num_aligned_right_xmulti++;
 					}
-					got_left_read = left_reads_file.getRead(
-							curr_left_obs_order, l_read, reads_format, false,
-					    begin_id, end_id, &l_readProc, (pair_map_flags & 1)==0);
-					    //left_um_out, left_map_code, &(alnStats->num_unmapped_left),
-					    //&(alnStats->num_aligned_left_xmulti));
-
-					got_right_read = right_reads_file.getRead(
-							curr_right_obs_order, r_read, reads_format, false,
-					    begin_id, end_id, &r_readProc, (pair_map_flags & 2)==0);
-					    //right_um_out, right_map_code, &(alnStats->num_unmapped_right),
-					    //&(alnStats->num_aligned_right_xmulti));
-					//FIXME: what's the best way to check here if the pair alignment is discordant?
-					//if (((pair_map_flags & 3)==3) && (best_hits.size() <= 0 || grade.fusion)) {
+
+					//l_read should be written as unmapped if (pair_map_flags & 1)==0
+					if (got_left_read && (pair_map_flags & 1)==0) {
+						l_readProc.writeUnmapped(l_read);
+					}
+					// r_read should be written as unmapped if (pair_map_flags & 2)==0
+					if (got_right_read && (pair_map_flags & 2)==0) {
+						r_readProc.writeUnmapped(r_read);
+					}
+					*/
 					if (((pair_map_flags & 3)==3) && !grade.concordant()) {
 						alnStats->num_aligned_pairs_disc++;
 					}
@@ -2451,7 +2488,8 @@ struct ReportWorker {
 							paired_alignments = false;
 					}
 				}
-				if (paired_alignments) {
+				if (paired_alignments) { //after some filtering, these paired reads still
+					                     // have some alignments together
 					HitsForRead best_left_hit_group;
 					best_left_hit_group.insert_id = curr_left_obs_order;
 					HitsForRead best_right_hit_group;
@@ -2463,16 +2501,7 @@ struct ReportWorker {
 					}
 
 					if (best_hits.size() > 0) {
-/*
-						bool got_left_read = left_reads_file.getRead(
-						    best_hits[0].first.insert_id(), l_read, reads_format, false,
-						    begin_id, end_id, left_um_out, 0, &(alnStats->num_unmapped_left));
-
-						bool got_right_read = right_reads_file.getRead(
-						    best_hits[0].first.insert_id(), r_read, reads_format, false,
-						    begin_id, end_id, right_um_out, 0, &(alnStats->num_unmapped_right));
-*/
-						if (got_left_read && got_right_read) {
+						if (got_left_read && got_right_read) { //should always be true
 							update_junctions(best_left_hit_group, *final_junctions);
 							update_insertions_and_deletions(best_left_hit_group,
 							    *final_insertions, *final_deletions);
@@ -2488,25 +2517,66 @@ struct ReportWorker {
 							pair_support(best_hits, *final_fusions, *fusions);
 
 							print_sam_for_pair(*rt, best_hits, grade, bam_writer,
-							    l_read.alt_name, r_read.alt_name, rng, begin_id, end_id);
-						}
+							    l_read.read.alt_name, r_read.read.alt_name, rng, begin_id, end_id);
+							l_read.um_written=true;
+							r_read.um_written=true;
+						} /*
 						else {
-							fprintf(stderr, "Warning: couldn't get reads for pair #%ld (%d, %d)\n",
+							fprintf(stderr, "Error: couldn't get reads for pair #%ld (%d, %d)\n",
 									curr_left_obs_order, int(got_left_read), int(got_right_read));
-						}
+						}*/
+					}
+
+					if (best_hits.size()>0) {
+						alnStats->num_aligned_left++;
+						alnStats->num_aligned_right++;
+						alnStats->num_aligned_pairs++;
+					}
+					else {
+						alnStats->num_unmapped_left++;
+						alnStats->num_unmapped_right++;
+					}
+					if (pair_map_flags & 4) alnStats->num_aligned_left_multi++;
+					if (pair_map_flags & 8) alnStats->num_aligned_right_multi++;
+					if ((pair_map_flags & 12) == 12) alnStats->num_aligned_pairs_multi++;
+
+					if (pair_map_flags & 16) {
+						//left_map_code='M';
+						alnStats->num_aligned_left_xmulti++;
+					}
+					if (pair_map_flags & 32) {
+						//right_map_code='M';
+						alnStats->num_aligned_right_xmulti++;
+					}
+
+					//l_read should be written as unmapped if (pair_map_flags & 1)==0
+					if (got_left_read && (pair_map_flags & 1)==0) {
+						l_readProc.writeUnmapped(l_read);
+					}
+					// r_read should be written as unmapped if (pair_map_flags & 2)==0
+					if (got_right_read && (pair_map_flags & 2)==0) {
+						r_readProc.writeUnmapped(r_read);
 					}
 				}
-				else { //alignments not paired properly
+				else { //paired reads with alignments not paired properly
 					if (curr_left_hit_group.hits.size() > 0) {
 						write_singleton_alignments(curr_left_obs_order, curr_left_hit_group,
 						    left_reads_file, bam_writer, //*left_um_out,
-						    is_paired ? FRAG_LEFT : FRAG_UNPAIRED, &l_readProc, &l_read);
+									   PE_reads ? FRAG_LEFT : FRAG_UNPAIRED, &l_readProc,
+											   got_left_read ? &l_read : NULL);
+					}
+					else {
+						l_readProc.writeUnmapped(l_read);
+						alnStats->num_unmapped_left++;
 					}
 
 					if (curr_right_hit_group.hits.size() > 0) {   //only right read mapped
 						write_singleton_alignments(curr_right_obs_order,
 						    curr_right_hit_group, right_reads_file, bam_writer, //*right_um_out,
-						    FRAG_RIGHT, &r_readProc, &r_read);
+									   FRAG_RIGHT, &r_readProc, got_right_read ? &r_read : NULL);
+					} else {
+						r_readProc.writeUnmapped(r_read);
+						alnStats->num_unmapped_right++;
 					}
 				}
 
@@ -2522,11 +2592,11 @@ struct ReportWorker {
 
 		//print the remaining unmapped reads at the end of each reads' stream
 
-		left_reads_file.getRead(VMAXINT32, l_read, reads_format, false, begin_id,
+		left_reads_file.getQRead(VMAXINT32, l_read, begin_id,
 		    end_id, &l_readProc);
 		    //left_um_out, 0, &(alnStats->num_unmapped_left), &(alnStats->num_aligned_left_xmulti));
 		if (right_reads_file.file())
-			right_reads_file.getRead(VMAXINT32, r_read, reads_format, false, begin_id,
+			right_reads_file.getQRead(VMAXINT32, r_read, begin_id,
 			    end_id, &r_readProc);
 			    //right_um_out, 0, &(alnStats->num_unmapped_right), &(alnStats->num_aligned_right_xmulti));
 
@@ -2573,7 +2643,7 @@ struct ReportWorker {
 	//read alignment accounting:
 	SAlignStats* alnStats;
 
-	bool is_paired;
+	bool PE_reads;
 
 	boost::mt19937 rng;
 };
@@ -2945,7 +3015,7 @@ int main(int argc, char** argv)
   fprintf(stderr, "tophat_reports v%s (%s)\n", PACKAGE_VERSION, SVN_REVISION);
   fprintf(stderr, "---------------------------------------\n");
   
-  reads_format = FASTQ;
+  //reads_format = FASTQ;
   
   int parse_ret = parse_options(argc, argv, print_usage);
   if (parse_ret)

-- 
Alioth's /git/debian-med/git-commit-notice on /srv/git.debian.org/git/debian-med/tophat.git



More information about the debian-med-commit mailing list