[med-svn] [bitseq] 11/14: New upstream version 0.7.5+dfsg

Andreas Tille tille at debian.org
Sat Dec 2 08:47:48 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository bitseq.

commit ebcd5e842eff36228eb7742306a08fa2b01c6cfa
Author: Andreas Tille <tille at debian.org>
Date:   Sat Dec 2 09:45:57 2017 +0100

    New upstream version 0.7.5+dfsg
---
 ArgumentParser.cpp                                |  308 ++++++
 ArgumentParser.h                                  |  110 ++
 CollapsedSampler.cpp                              |   95 ++
 CollapsedSampler.h                                |   16 +
 FileHeader.cpp                                    |  117 +++
 FileHeader.h                                      |   45 +
 GibbsParameters.cpp                               |  110 ++
 GibbsParameters.h                                 |   45 +
 GibbsSampler.cpp                                  |   97 ++
 GibbsSampler.h                                    |   17 +
 Makefile                                          |  129 +++
 MyTimer.cpp                                       |   60 ++
 MyTimer.h                                         |   28 +
 PosteriorSamples.cpp                              |  299 ++++++
 PosteriorSamples.h                                |   66 ++
 README.md                                         |   34 +
 ReadDistribution.cpp                              | 1126 +++++++++++++++++++++
 ReadDistribution.h                                |  134 +++
 Sampler.cpp                                       |  220 ++++
 Sampler.h                                         |   99 ++
 SimpleSparse.cpp                                  |   97 ++
 SimpleSparse.h                                    |   27 +
 TagAlignments.cpp                                 |  132 +++
 TagAlignments.h                                   |   44 +
 TranscriptExpression.cpp                          |   87 ++
 TranscriptExpression.h                            |   38 +
 TranscriptInfo.cpp                                |  258 +++++
 TranscriptInfo.h                                  |   75 ++
 TranscriptSequence.cpp                            |  197 ++++
 TranscriptSequence.h                              |   82 ++
 VariationalBayes.cpp                              |  384 +++++++
 VariationalBayes.h                                |   45 +
 _release_Makefile                                 |  122 +++
 asa103/LICENSE.txt                                |  165 +++
 asa103/asa103.hpp                                 |   96 ++
 biocUpdate.sh                                     |   24 +
 changeList                                        |   86 ++
 checkTR.py                                        |   76 ++
 common.cpp                                        |   22 +
 common.h                                          |   41 +
 convertSamples.cpp                                |  197 ++++
 debian/changelog                                  |   14 -
 debian/compat                                     |    1 -
 debian/control                                    |   30 -
 debian/copyright                                  |   22 -
 debian/patches/hardening.patch                    |   17 -
 debian/patches/link_against_system_samtools.patch |   51 -
 debian/patches/series                             |    2 -
 debian/rules                                      |   63 --
 debian/source/format                              |    1 -
 debian/watch                                      |    4 -
 estimateDE.cpp                                    |  326 ++++++
 estimateExpression.cpp                            |  597 +++++++++++
 estimateHyperPar.cpp                              |  369 +++++++
 estimateVBExpression.cpp                          |  238 +++++
 extractSamples.cpp                                |  126 +++
 extractTranscriptInfo.py                          |   91 ++
 getCounts.py                                      |   78 ++
 getFoldChange.cpp                                 |  112 ++
 getGeneExpression.cpp                             |  120 +++
 getPPLR.cpp                                       |  152 +++
 getVariance.cpp                                   |  167 +++
 getWithinGeneExpression.cpp                       |  248 +++++
 lowess.cpp                                        |  511 ++++++++++
 lowess.h                                          |   30 +
 misc.cpp                                          |  240 +++++
 misc.h                                            |   84 ++
 parameters1.txt                                   |   32 +
 parseAlignment.cpp                                |  612 +++++++++++
 parseAlignment.py                                 |  482 +++++++++
 releaseDo.sh                                      |   52 +
 releaseList                                       |   55 +
 tagAlignment.h                                    |   37 +
 transposeFiles.cpp                                |  146 +++
 transposeFiles.h                                  |    4 +
 transposeLargeFile.cpp                            |   22 +
 76 files changed, 10381 insertions(+), 205 deletions(-)

diff --git a/ArgumentParser.cpp b/ArgumentParser.cpp
new file mode 100644
index 0000000..63b94d7
--- /dev/null
+++ b/ArgumentParser.cpp
@@ -0,0 +1,308 @@
+#include<algorithm>
+#include<cstdlib>
+#include<sstream>
+
+#include"ArgumentParser.h"
+
+#include "misc.h"
+
+#include "common.h"
+
+#define FF first
+#define SS second
+#define Sof(x) (long)x.size()
+
+vector <double> tokenizeD(const string &input,const string &space = ","){//{{{
+   vector <double> ret;
+   long pos=0,f=0,n=input.size();
+   while((pos<n)&&(f<n)&&(f>=0)){
+      f=input.find(space,pos);
+      if(f==pos)pos++;
+      else{
+         if((f <n)&&(f>=0)){
+            ret.push_back(atof(input.substr(pos,f-pos).c_str()));
+            pos=f+1;
+         }
+      }
+   }
+   if(pos<n)ret.push_back(atof(input.substr(pos,n-pos).c_str()));
+   return ret;
+} //}}}
+
+
+// GET {{{
+string ArgumentParser::getS(const string &name) const{
+   if(!existsOption(name,true))return "";
+   if(mapS.find(name)!=mapS.end())
+      return mapS.find(name)->second;
+   return "";
+}
+string ArgumentParser::getLowerS(const string &name) const{
+   if(!existsOption(name,true))return "";
+   if(mapS.find(name)!=mapS.end())
+      return ns_misc::toLower(mapS.find(name)->second);
+   return "";
+}
+long ArgumentParser::getL(const string &name) const{
+   if(!existsOption(name,true))return -1;
+   if(mapL.find(name)!=mapL.end())
+      return mapL.find(name)->second;
+   return -1;
+}
+double ArgumentParser::getD(const string &name) const{
+   if(!existsOption(name,true))return -1;
+   if(mapD.find(name)!=mapD.end())
+      return mapD.find(name)->second;
+   return -1;
+}
+bool ArgumentParser::flag(const string &name) const {
+   return isSet(name);
+}
+vector<double> ArgumentParser::getTokenizedS2D(const string &name) const{
+   if(!existsOption(name,true))return vector<double>();
+   if(mapS.find(name)!=mapS.end())
+      return tokenizeD(mapS.find(name)->second);
+   return vector<double>();
+}//}}}
+// SET {{{
+void ArgumentParser::updateS(const string &name, const string &value){
+   if(!existsOption(name))error("ArgumentParser: argument name %s unknown.\n",name.c_str());
+   if(mapS.find(name)!=mapS.end())
+      mapS.find(name)->second = value;
+}//}}}
+bool ArgumentParser::parse(int argc,char * argv[]){//{{{
+//   for(long i=0;i<argc;i++)message("_%s_\n",(args[i]).c_str());
+   // add verbose if  possible {{{
+   if(! (existsName("v")||existsName("verbose")||existsOption("verbose")))
+      addOptionB("v","verbose","verbose",0,"Verbose output.");
+   //if(! (existsName("V")||existsName("veryVerbose")||existsOption("veryVerbose")))
+   //   addOptionB("V","veryVerbose","veryVerbose",0,"Very verbose output.");
+   // }}}
+   programName=(string)argv[0];
+   string val,opt;
+   for(long i = 1; i<argc;i++){
+      val=(string)argv[i];
+      if(val[0]!='-'){
+         arguments.push_back(val);
+         continue;
+      }
+      if(Sof(val)==2){
+         opt=val.substr(1,1);
+         val="";
+      }else{
+         if(val.find("=")!=string::npos){
+            opt=val.substr(2,val.find("=")-2);
+            val=val.substr(val.find("=")+1);
+         }else{
+            opt=val.substr(2);
+            val="";
+         }
+      }
+      if((opt=="help")||(opt=="h")){
+         usage();
+         return false;
+      }
+      if(names.find(opt)==names.end()){
+         error("Unknown option '%s'.\n",argv[i]);
+         return false;
+      }
+      if(validOptions[names[opt]].type!=OTBOOL){
+         if(val==""){
+            i++;
+            if(i==argc)break;
+            val = argv[i];
+         }
+         switch(validOptions[names[opt]].type){
+            case OTSTRING:
+               mapS[names[opt]]=val;
+               break;
+            case OTLONG:
+               mapL[names[opt]]=atoi(val.c_str());
+               break;
+            case OTDOUBLE:
+               mapD[names[opt]]=atof(val.c_str());
+               break;
+            case OTBOOL:;
+         }
+      }else{
+         mapB[names[opt]]=!mapB[names[opt]];
+      }
+   }
+   //writeAll();
+   if(Sof(arguments)<minimumArguments){
+      error("Need at least %ld arguments.\n\n",minimumArguments);
+      usage();
+      return false;
+   }
+   for(long i = 0;i<Sof(compulsory);i++){
+      if(! isSet(compulsory[i])){
+         error("Missing option \"%s\"\n",(compulsory[i]).c_str());
+         usage();
+         return false;
+      }
+   }
+   // set public variable verbose 
+   verbose = flag("verbose")||(existsOption("veryVerbose")&&flag("veryVerbose"));
+   return true;
+}//}}}
+void ArgumentParser::writeAll(){//{{{
+   message("arguments: ");
+   for(long i=0;i<Sof(arguments);i++)
+      message("%s ",(arguments[i]).c_str());
+   message("\n");
+   for(map<string,string>::iterator it=mapS.begin();it!=mapS.end();it++){
+      message("OPT:%s VAL:%s\n",(it->FF).c_str(),(it->SS).c_str());
+   }
+   for(map<string,long>::iterator it=mapL.begin();it!=mapL.end();it++){
+      message("OPT:%s VAL:%ld\n",(it->FF).c_str(),it->SS);
+   }
+   for(map<string,double>::iterator it=mapD.begin();it!=mapD.end();it++){
+      message("OPT:%s VAL:%lf\n",(it->FF).c_str(),(it->SS));
+   }
+   for(map<string,bool>::iterator it=mapB.begin();it!=mapB.end();it++){
+      message("OPT:%s VAL:%d\n",(it->FF).c_str(),(it->SS));
+   }
+}//}}}
+void ArgumentParser::addOptionL(const string &shortName,const string &longName, const string &name, bool comp, const string &description, long defValue){//{{{
+   Option newOpt;
+   if(existsOption(name)){
+      error("ArgumentParser: Option \"%s\"\n",(name).c_str());
+      return;
+   }
+   newOpt.type=OTLONG;
+   newOpt.shortName=shortName;
+   newOpt.longName=longName;
+   newOpt.description=description;
+   if(defValue!=-47){
+      appendDescription<long>(&newOpt.description,defValue);
+      mapL[name]=defValue;
+   }
+   validOptions[name]=newOpt;
+   if(shortName!="")names[shortName]=name;
+   if(longName!="")names[longName]=name;
+   if(comp)compulsory.push_back(name);
+}//}}}
+void ArgumentParser::addOptionD(const string &shortName,const string &longName, const string &name, bool comp, const string &description, double defValue){//{{{
+   Option newOpt;
+   if(existsOption(name)){
+      error("ArgumentParser: Option \"%s\"\n",(name).c_str());
+      return;
+   }
+   newOpt.type=OTDOUBLE;
+   newOpt.shortName=shortName;
+   newOpt.longName=longName;
+   newOpt.description=description;
+   if(defValue!=-47.47){
+      appendDescription<double>(&newOpt.description,defValue);
+      mapD[name]=defValue;
+   }
+   validOptions[name]=newOpt;
+   if(shortName!="")names[shortName]=name;
+   if(longName!="")names[longName]=name;
+   if(comp)compulsory.push_back(name);
+}//}}}
+void ArgumentParser::addOptionB(const string &shortName,const string &longName, const string &name, bool comp, const string &description, bool defValue){//{{{
+   Option newOpt;
+   if(existsOption(name)){
+      error("ArgumentParser: Option \"%s\"\n",(name).c_str());
+      return;
+   }
+   mapB[name]=defValue;
+   newOpt.type=OTBOOL;
+   newOpt.shortName=shortName;
+   newOpt.longName=longName;
+   newOpt.description=description;
+   if(defValue) newOpt.description +=" (default: On)";
+   else newOpt.description+=" (default: Off)";
+   validOptions[name]=newOpt;
+   if(shortName!="")names[shortName]=name;
+   if(longName!="")names[longName]=name;
+   if(comp)compulsory.push_back(name);
+}//}}}
+void ArgumentParser::addOptionS(const string &shortName,const string &longName, const string &name, bool comp, const string &description, const string &defValue){//{{{
+   Option newOpt;
+   if(existsOption(name)){
+      error("ArgumentParser: Option \"%s\"\n",(name).c_str());
+      return;
+   }
+   newOpt.type=OTSTRING;
+   newOpt.shortName=shortName;
+   newOpt.longName=longName;
+   newOpt.description=description;
+   if(defValue!="noDefault"){
+      appendDescription<string>(&newOpt.description,defValue);
+      mapS[name]=defValue;
+   }
+   validOptions[name]=newOpt;
+   if(shortName!="")names[shortName]=name;
+   if(longName!="")names[longName]=name;
+   if(comp)compulsory.push_back(name);
+}//}}}
+//{{{ void ArgumentParser::appendDescription(string &desc,valueType defValue)
+template <typename valueType>
+void ArgumentParser::appendDescription(string *desc,valueType defValue){
+   stringstream descStream;
+   descStream<<*desc<<" (default: "<<defValue<<")";
+   *desc = descStream.str();
+}//}}}
+void ArgumentParser::usage(){//{{{
+   map<string,Option>::iterator it;
+   vector<string>::iterator itV;
+   Option opt;
+   message("\nUsage: %s ",(programName).c_str());
+   sort(compulsory.begin(),compulsory.end());
+   for(itV=compulsory.begin();itV!=compulsory.end();itV++){
+      if(validOptions[*itV].shortName!="")
+         message("-%s ",(validOptions[*itV].shortName).c_str());
+      else
+         message("--%s ",(validOptions[*itV].longName).c_str());
+      if(validOptions[*itV].type!=OTBOOL)message("<%s> ",(*itV).c_str());
+   }
+   message(" [OPTIONS] %s\n",(argumentDesc).c_str());
+   message("\n%s\n\nOptions:\n",(programDesc).c_str());
+   message("  --help\n    Show this help information.\n\n");
+   for(it=validOptions.begin();it!=validOptions.end();it++){
+      opt=it->SS;
+      message("  ");
+      if(opt.shortName!=""){
+         message("-%s",(opt.shortName).c_str());
+         if(opt.type!=OTBOOL)message(" <%s>",(it->FF).c_str());
+         if(opt.longName!="")message(" ,   ");
+      }
+      if(opt.longName!=""){
+         message("--%s",(opt.longName).c_str());
+         if(opt.type!=OTBOOL)message("=<%s>",(it->FF).c_str());
+      }
+      message("\n");
+      if(opt.description!=""){
+         message("    %s\n\n",(opt.description).c_str());
+      }
+   }
+}//}}}
+bool ArgumentParser::isSet(const string &name) const {//{{{
+   if(! existsOption(name,true))return false;
+   switch(validOptions.find(name)->second.type){
+      case OTSTRING:
+         if(mapS.find(name)==mapS.end())return false;
+         else return true;
+      case OTLONG:
+         if(mapL.find(name)==mapL.end())return false;
+         else return true;
+      case OTBOOL:
+         if(mapB.find(name)==mapB.end())return false;
+         else return mapB.find(name)->second;
+      case OTDOUBLE:
+         if(mapD.find(name)==mapD.end())return false;
+         else return true;
+   }
+   return false;
+}//}}}
+bool ArgumentParser::existsName(const string &name) const {//{{{
+   if(names.find(name)==names.end())return false;
+   return true;
+}//}}}
+bool ArgumentParser::existsOption(const string &name, bool warn) const {//{{{
+   if(validOptions.find(name)!=validOptions.end())return true;
+   if(warn)error("ArgumentParser: argument name %s unknown.\n",(name).c_str());
+   return false;
+}//}}}
diff --git a/ArgumentParser.h b/ArgumentParser.h
new file mode 100644
index 0000000..aaaea94
--- /dev/null
+++ b/ArgumentParser.h
@@ -0,0 +1,110 @@
+#ifndef ARGUMENTPARSER_H
+#define ARGUMENTPARSER_H
+
+#include<map>
+#include<string>
+#include<vector>
+
+using namespace std;
+
+enum OptionType {OTSTRING, OTLONG, OTBOOL, OTDOUBLE};
+struct Option{//{{{
+   OptionType type;
+   string shortName,longName,description;
+};//}}}
+
+class ArgumentParser{
+   private:
+      map<string,string> mapS;
+      map<string,long> mapL;
+      map<string,bool> mapB;
+      map<string,double> mapD;
+      map<string,string> names;
+      map<string,Option> validOptions;
+      string programName, argumentDesc, programDesc;
+      vector<string> arguments;
+      vector<string> compulsory;
+      long minimumArguments;
+
+      bool existsOption(const string &name, bool warn = false) const;
+      bool existsName(const string &name) const;
+      template <typename valueType>
+      void appendDescription(string *desc,valueType defValue);
+   public:
+      // The value of verbose option for direct access.
+      bool verbose;
+
+      // Constructor for the class sets: programDescription, additional string
+      // and minimum number of required arguments.
+      ArgumentParser(const string &pD="",const string &aD="[FILES]", long minArgs = 1){//{{{
+         verbose = false;
+         init(pD,aD,minArgs);
+      }//}}}
+      // Init function for initialization, sets the same values as constructor.
+      void init(const string &pD="",const string &aD="[FILES]", long minArgs = 1){//{{{
+         programDesc=pD; 
+         argumentDesc=aD; 
+         minimumArguments = minArgs;
+      }//}}}
+      // Parse function given number of arguments and array of arguments 
+      // it processes the arguments and makes options available through 
+      // get[S/L/D] functions and args() function.
+      bool parse(int n,char * argv[]); 
+      /*
+       * SETTERS:
+       */
+      // Add option (string) adds new option, name is the name used for referring 
+      // to it.
+      void addOptionS(const string &shortName,
+                      const string &longName,
+                      const string &name,
+                      bool comp,
+                      const string &description="", 
+                      const string &defValue="noDefault");
+      // Add option (long).
+      void addOptionL(const string &shortName, const string &longName,
+                      const string &name, bool comp, const string &description="",
+                      long defValue=-47);
+      // Add option (double).
+      void addOptionD(const string &shortName, const string &longName,
+                      const string &name, bool comp, const string &description="",
+                      double defValue=-47.47);
+      // Add option (boolean or 'flag').
+      void addOptionB(const string &shortName, const string &longName, 
+                      const string &name, bool comp, const string &description="",
+                      bool defValue=false);
+      /*
+       * GETTERS:
+       */
+      // Return reference to vector of arguments
+      // (i.e. the strings provided with no -/-- modifier).
+      const vector<string>& args() const { return arguments; }
+      // Return true if option <name> was set.
+      bool isSet(const string &name) const;
+      // Return value of string option <name>.
+      string getS(const string &name) const;
+      // Return value of string option <name> in lower case.
+      string getLowerS(const string &name) const;
+      // Return value of integer option <name>.
+      long getL(const string &name) const;
+      // Return value of double option <name>.
+      double getD(const string &name) const;
+      // Return value of bool option <name>.
+      bool flag(const string &name) const;
+      // Return value of verbose.
+      bool verb() const { return verbose; }
+
+      /*
+       * OTHER:
+       */
+      // (Advanced get) Return tokenized (comma separated) string as vector of doubles.
+      vector<double> getTokenizedS2D(const string &name) const;
+      // Write usage string.
+      void usage();
+      // Write all options.
+      void writeAll();
+      // Update value of existing string option.
+      void updateS(const string &name, const string &value);
+};
+
+#endif
diff --git a/CollapsedSampler.cpp b/CollapsedSampler.cpp
new file mode 100644
index 0000000..6d3c131
--- /dev/null
+++ b/CollapsedSampler.cpp
@@ -0,0 +1,95 @@
+#ifdef DoSTATS
+#include<sys/time.h>
+#endif
+
+#include "CollapsedSampler.h"
+#include "common.h"
+
+void CollapsedSampler::sampleZ(){//{{{
+   int_least32_t i,j,k;
+   // Resize Z and initialize if not big enough. {{{
+   if((long)Z.size() != Nmap){
+      Z.assign(Nmap,0);
+      // init Z&C
+      for(i=0;i<Nmap;i++){
+         //choose random transcript;
+         k = (int_least32_t) (m * uniformDistribution(rng_mt));
+         Z[i]=k;
+         C[k]++;
+      }
+   }//}}}
+   // TimeStats {{{
+#ifdef DoSTATS
+   nZ++;
+   struct timeval start, end;
+   gettimeofday(&start, NULL);
+#endif
+   // }}}
+   vector<double> phi(m,0); 
+   // phi of size M should be enough 
+   // because of summing the probabilities for each isoform when reading the data
+   double probNorm,r,sum,const1a,const1b,const2a;
+   int_least32_t readsAlignmentsN;
+
+   const1a = beta->beta + Nunmap;
+   const1b = m * dir->alpha + Nmap - 1;
+   const2a = beta->alpha + Nmap - 1;
+   // randomize order: ???
+   for(i=0;i<Nmap;i++){
+      probNorm=0;
+      C[Z[i]]--; // use counts without the current one 
+      readsAlignmentsN = alignments->getReadsI(i+1) - alignments->getReadsI(i);
+      for(j=0, k=alignments->getReadsI(i); j<readsAlignmentsN; j++, k++){
+         //message("%ld %lf ",(*alignments)[k].getTrId(),(*alignments)[k].getProb());
+         if(alignments->getTrId(k) == 0){
+            phi[j] = alignments->getProb(k) *
+               (const1a + C[0]) *
+               (const1b - C[0]); // this comes from division in "false part"
+         }else{
+            phi[j] = alignments->getProb(k) *
+               (const2a - C[0]) *
+               (dir->alpha + C[ alignments->getTrId(k) ]); 
+               /* 
+               /(m * dir->alpha + Nmap - 1 - C[0]) ;
+               this term was replaced by *(const1b - C[0]) 
+               and moved into "true part" as multiplication 
+               */
+         }
+         probNorm += phi[j];
+      }
+      r = uniformDistribution(rng_mt);
+      // Apply Normalization constant:
+      r *= probNorm;
+      for(j = 0, sum = 0 ; (sum<r) && (j<readsAlignmentsN); j++){
+         sum += phi[j];
+      }
+      if(j==0){
+         // e.g. if probNorm == 0
+         // assign to noise.
+         Z[i] = 0;
+      } else {
+         Z[i] = alignments->getTrId(alignments->getReadsI(i) + j -1);
+      }
+      C[ Z[i] ]++;
+   }
+   // TimeStats {{{
+#ifdef DoSTATS
+   gettimeofday(&end, NULL);
+   tZ += (end.tv_sec-start.tv_sec)*1000*1000+(end.tv_usec-start.tv_usec);
+#endif
+   // }}}
+}//}}}
+
+void CollapsedSampler::update(){//{{{
+   Sampler::update();
+
+   sampleTheta();
+
+   updateSums();
+   if((doLog)&&(save))appendFile();
+}//}}}
+void CollapsedSampler::sample(){//{{{
+   Sampler::sample();
+
+   sampleZ();
+}//}}}
diff --git a/CollapsedSampler.h b/CollapsedSampler.h
new file mode 100644
index 0000000..eb8c56d
--- /dev/null
+++ b/CollapsedSampler.h
@@ -0,0 +1,16 @@
+#include<stdint.h>
+
+#include "Sampler.h"
+
+class CollapsedSampler : public Sampler{
+   private:
+   vector<int_least32_t> Z;
+
+   void sampleZ();
+
+   public:
+
+   virtual void update();
+   virtual void sample();
+   
+};
diff --git a/FileHeader.cpp b/FileHeader.cpp
new file mode 100644
index 0000000..d40f58c
--- /dev/null
+++ b/FileHeader.cpp
@@ -0,0 +1,117 @@
+#include<cstdlib>
+
+#include "FileHeader.h"
+#include "misc.h"
+
+#include "common.h"
+
+using namespace ns_fileHeader;
+
+void FileHeader::skipEmptyLines() {//{{{
+   if(!file) return;
+   while(file->good() &&
+         ((file->peek() == ' ') ||
+          (file->peek() == '\n')))
+      file->get();
+}//}}}
+
+bool FileHeader::readValues(ofstream *outF){//{{{
+   if((file==NULL)||(!file->is_open())){
+      error("FileHeader: Input file not opened for reading.\n");
+      return false;
+   }
+   string line;
+   vector<string> words;
+   long value;
+   char *chP;
+   skipEmptyLines();
+   while(file->good() && (file->peek() == '#')){
+      // Read line.
+      getline(*file, line);
+      // If outF is defined, copy the header there.
+      if(outF!=NULL)(*outF)<<line<<endl;
+      skipEmptyLines();
+      // Tokenize line into words.
+      words = ns_misc::tokenize(line);
+      // Store words as flags. Start with 1st word as the 0th one are hashes.
+      // If word is followed by a numeric value, use it as a value for the flag.
+      for(long i=1;i<(long)words.size();i++){
+         // Only add new entry if it wasn't there already.
+         if(values.count(words[i])==0)
+            values[words[i]] = no_value;
+         // See if next word is numeric and if so, then use it as a value.
+         if(i+1<(long)words.size()){
+            value = strtol(words[i+1].c_str(), &chP, 10);
+            // Conversion was succesful the value is non-zero OR the pointer should point to end of string (null character).
+            if((value!=0)||(*chP=='\0')) {
+               // Save value and skip the number.
+               values[words[i]] = value;
+               i++;
+            }
+         }
+      }
+   }
+   return true;
+}//}}}
+
+bool FileHeader::samplesHeader(long *n, long *m, bool *transposed, bool *logged){//{{{
+   if(!readValues()){
+      *n=0;
+      *m=0;
+      return false;
+   }
+   if(logged!=NULL)if(values.count("L"))*logged = true;
+   if(values.count("T"))*transposed = true;
+   if(values.count("M") && (values["M"]!=no_value))*m = values["M"];
+   if(values.count("N") && (values["N"]!=no_value))*n = values["N"];
+   return true;
+}//}}}
+
+bool FileHeader::transcriptsHeader(long *m, long *colN){//{{{
+   if(!readValues()){
+      *m=0;
+      return false;
+   }
+   if(values.count("M") && (values["M"]!=no_value))*m = values["M"];
+   if(colN!=NULL)
+      if(values.count("colN") && (values["colN"]!=no_value))*colN = values["colN"];
+   return true;
+}//}}}
+
+bool FileHeader::probHeader(long *Nmap, long *Ntotal, long *M, AlignmentFileType *format){//{{{
+   if(!readValues()){
+      *M=0;
+      *Nmap=0;
+      return false;
+   }
+   if(values.count("LOGFORMAT")){*format = LOG_FORMAT;}
+   else if(values.count("NEWFORMAT")){*format = NEW_FORMAT;}
+   else *format = OLD_FORMAT;
+   if(values.count("Ntotal") && (values["Ntotal"]!=no_value))*Ntotal = values["Ntotal"];
+   if(values.count("Nmap") && (values["Nmap"]!=no_value))*Nmap = values["Nmap"];
+   if(values.count("M") && (values["M"]!=no_value))*M = values["M"];
+   return true;
+}//}}}
+
+bool FileHeader::varianceHeader(long *m,bool *logged){//{{{
+   if(!readValues()){
+      *m=0;
+      return false;
+   }
+   if(logged!=NULL)if(values.count("L"))*logged = true;
+   if(values.count("M") && (values["M"]!=no_value))*m = values["M"];
+   return true;
+}//}}}
+
+bool FileHeader::paramsHeader(long *parN, ofstream *outF){//{{{
+   if(!readValues(outF)){
+      *parN=0;
+      return false;
+   }
+   *parN = 0;
+   if(values.count("PN") && (values["PN"]!=no_value))*parN = values["PN"];
+   return true;
+}//}}}
+   
+
+
diff --git a/FileHeader.h b/FileHeader.h
new file mode 100644
index 0000000..e1ae93f
--- /dev/null
+++ b/FileHeader.h
@@ -0,0 +1,45 @@
+#ifndef FILEHEADER_H
+#define FILEHEADER_H
+
+#include<fstream>
+#include<map>
+#include<vector>
+
+using namespace std;
+
+const long no_value = -4747;
+
+namespace ns_fileHeader {
+enum AlignmentFileType { OLD_FORMAT, NEW_FORMAT, LOG_FORMAT };
+} // namespace ns_fileHeader
+
+// FileHeader class parses file headers (lines starting with # at the beginning of the file).
+// Every word (space separated string) is considered a possible FLAG.
+// If a FLAG is followed by a numeric value, than the value is stored as the FLAG's value.
+// The individual functions then just look whether FLAG was present, and in case of integers, whether it had some value assigned to it.
+class FileHeader {
+ private:
+   ifstream *file;
+   map<string,long> values;
+   bool readValues(ofstream *outF = NULL);
+
+   void skipEmptyLines();
+ public:
+   FileHeader(ifstream *f = NULL) {
+      file = f;
+   }
+   void setFile(ifstream *f){
+      file = f;
+   }
+   void close(){
+      file->close();
+      file=NULL;
+   }
+   bool samplesHeader(long *n, long *m, bool *transposed, bool *logged = NULL);
+   bool transcriptsHeader(long *m, long *colN);
+   bool probHeader(long *Nmap, long *Ntotal, long *M, ns_fileHeader::AlignmentFileType *format);
+   bool varianceHeader(long *m, bool *logged);
+   bool paramsHeader(long *parN, ofstream *outF);
+};
+
+#endif
diff --git a/GibbsParameters.cpp b/GibbsParameters.cpp
new file mode 100644
index 0000000..82479b4
--- /dev/null
+++ b/GibbsParameters.cpp
@@ -0,0 +1,110 @@
+#include<fstream>
+
+using namespace std;
+
+#include "GibbsParameters.h"
+#include "common.h"
+
+#define DEBUGGP(x) 
+#define Sof(x) (long)x.size()
+
+
+/*void gibbsParameters::setLogFiles(string tau,string tauMeans){//{{{
+   gs_samplesFile=tau;
+   gs_meansFile=tauMeans; 
+}//}}}*/
+void gibbsParameters::getAllParameters(){//{{{
+   message("Parameters:\n   burnIn: %ld\
+\n   samplesN: %ld\n   samplesSave: %ld\
+\n   samplesNmax: %ld\n   chainsN: %ld\
+\n   targetScaleReduction: %lf\n   dirAlpha: %lf\
+\n   dirBeta: %lf\n   betaAlpha: %lf\n   betaBeta: %lf\n",
+gs_burnIn,gs_samplesN,gs_samplesSave,gs_samplesNmax,gs_chainsN,gs_targetScaleReduction,dirP.alpha,dirP.beta,betaP.alpha,betaP.beta);
+}//}}}
+bool gibbsParameters::setParameters(string paramFileName){//{{{
+   this->paramFileName = paramFileName;
+   return readParameters();
+}//}}}
+bool gibbsParameters::setParameters(ArgumentParser &args){//{{{
+   if(args.isSet("MCMC_burnIn"))gs_burnIn=args.getL("MCMC_burnIn");
+   if(args.isSet("MCMC_samplesN"))gs_samplesN=args.getL("MCMC_samplesN");
+   if(args.isSet("MCMC_samplesSave"))gs_samplesSave=args.getL("MCMC_samplesSave");
+   if(args.isSet("MCMC_samplesNmax"))gs_samplesNmax=args.getL("MCMC_samplesNmax");
+   if(args.isSet("MCMC_chainsN"))gs_chainsN=args.getL("MCMC_chainsN");
+   if(args.isSet("MCMC_scaleReduction"))gs_targetScaleReduction=args.getD("MCMC_scaleReduction");
+   if(args.isSet("MCMC_dirAlpha"))dirP.alpha=args.getD("MCMC_dirAlpha");
+   return true;
+}//}}}
+bool gibbsParameters::readParameters(){//{{{
+   ifstream pFile;
+   string param;
+   double val;
+   char tmp[256];
+   pFile.open(paramFileName.c_str());
+   while((pFile.is_open())&&(! pFile.eof())){
+      if((! (pFile>>param)) || (Sof(param)==0) || (param[0]=='#')){
+         pFile.getline(tmp,256);
+         continue;
+      }
+      pFile>>val;
+      if(pFile.good()){
+         DEBUGGP(message("# DEBUG gPar ||%s==%lf||\n",(param).c_str(),val);)
+         if(param=="burnIn")parameter("burnIn",gs_burnIn,val);
+         if(param=="samplesN")parameter("samplesN",gs_samplesN,val);
+         if(param=="samplesSave")parameter("samplesSave",gs_samplesSave,val);
+         if(param=="samplesNmax")parameter("samplesNmax",gs_samplesNmax,val);
+         if(param=="chainsN")parameter("chainsN",gs_chainsN,val);
+         if(param=="targetScaleReduction")parameter("targetScaleReduction",gs_targetScaleReduction,val);
+         if(param=="dirAlpha")parameter("dirAlpha",dirP.alpha,val);
+         if(param=="dirBeta")parameter("dirBeta",dirP.beta,val);
+         if(param=="betaAlpha")parameter("betaAlpha",betaP.alpha,val);
+         if(param=="betaBeta")parameter("betaBeta",betaP.beta,val);
+         //if(param=="output")parameter("output",gs_output,val);
+      }
+      pFile.getline(tmp,256);
+   }
+   //if(gs_samplesN>gs_samplesNmax)gs_samplesNmax=gs_samplesN;
+   pFile.close();
+   return true;
+}//}}}
+void gibbsParameters::parameter(string name, double &variable, double value){//{{{
+   bool output=false;
+   if(verbose && (variable != value))output = true;
+   variable = value;
+   if(output){
+      message("### %s: %lf\n",(name).c_str(),variable);
+   }
+}//}}}
+void gibbsParameters::parameter(string name, long &variable, double value){//{{{
+   bool output=false;
+   if(verbose && (variable != (long) value))output = true;
+   variable = (long) value;
+   if(output){
+      message("### %s: %ld\n",(name).c_str(),variable);
+   }
+}//}}}
+void gibbsParameters::parameter(string name, bool &variable, double value){//{{{
+   bool output=false;
+   if(verbose && (variable !=(bool)((long) value)))output = true;
+   variable = (bool)((long)value);
+   if(output){
+      message("### %s: %d\n",(name).c_str(),variable);
+   }
+}//}}}
+
+gibbsParameters::gibbsParameters(bool verbose){//{{{
+   this->verbose = verbose;
+   gs_burnIn=1000;
+   gs_samplesN=1000;
+   gs_samplesNmax=50000;
+   gs_samplesSave=500;
+   gs_chainsN=4;
+   gs_targetScaleReduction=1.2;
+   dirP.alpha=1;
+   dirP.beta=1;
+   betaP.alpha=10;
+   betaP.beta=2;
+   gs_samplesFile="gibbs_log.rpkmS";
+   gs_meansFile="gibbs_log.thetaMeans";
+   //gs_output=RPKM;
+}//}}}
diff --git a/GibbsParameters.h b/GibbsParameters.h
new file mode 100644
index 0000000..372b51c
--- /dev/null
+++ b/GibbsParameters.h
@@ -0,0 +1,45 @@
+#ifndef GIBBSPARAMETERS_H
+#define GIBBSPARAMETERS_H
+
+#include<string>
+
+using namespace std;
+
+#include "ArgumentParser.h"
+
+struct distributionParameters{//{{{
+   double alpha,beta;
+};//}}}
+
+class gibbsParameters{
+   private:
+      long gs_burnIn, gs_samplesN, gs_chainsN, gs_samplesNmax, gs_samplesSave;
+      double gs_targetScaleReduction;
+      bool verbose;
+      distributionParameters dirP, betaP;
+      string gs_samplesFile,gs_meansFile,paramFileName;
+      void parameter(string name, bool &variable, double value);
+      void parameter(string name, long &variable, double value);
+      void parameter(string name, double &variable, double value);
+   public:
+      gibbsParameters(bool verbose = true);
+      bool setParameters(string paramFileName);
+      bool setParameters(ArgumentParser &args);
+      bool readParameters();
+      void getAllParameters();
+      long burnIn() const {return gs_burnIn;}
+      long samplesN() const {return gs_samplesN;}
+      long samplesSave() const {return gs_samplesSave;}
+      long samplesNmax() const {return gs_samplesNmax;}
+      long chainsN() const {return gs_chainsN;}
+      const distributionParameters& dir() const {return dirP;}
+      const distributionParameters& beta()const {return betaP;}
+      double targetScaleReduction() const {return gs_targetScaleReduction;}
+//      string samplesFile() const {return gs_samplesFile;}
+//      string meansFile() const {return gs_meansFile;}
+//      void setLogFiles(string tau,string tauMeans);
+//      outputType output() const {return (outputType)gs_output;}
+};
+
+
+#endif
diff --git a/GibbsSampler.cpp b/GibbsSampler.cpp
new file mode 100644
index 0000000..7e00e9e
--- /dev/null
+++ b/GibbsSampler.cpp
@@ -0,0 +1,97 @@
+#ifdef DoSTATS
+#include<sys/time.h>
+#endif
+
+#include "GibbsSampler.h"
+#include "common.h"
+
+GibbsSampler::GibbsSampler(){ //{{{
+   thetaAct=0;
+}//}}}
+void GibbsSampler::sampleZ(){//{{{
+   // TimeStats {{{
+#ifdef DoSTATS
+   nZ++;
+   struct timeval start, end;
+   gettimeofday(&start, NULL);
+#endif
+   // }}}
+   long i,j,k;
+   vector<double> phi(m,0); 
+   // phi of size M should be enough 
+   // because of summing the probabilities for each isoform when reading the data
+   double probNorm,r,sum;
+   int_least32_t readsAlignmentsN;
+
+   // Reset C to zeros.
+   C.assign(C.size(),0);
+   // Assign reads.
+   for(i=0;i<Nmap;i++){
+      probNorm=0;
+      readsAlignmentsN = alignments->getReadsI(i+1) - alignments->getReadsI(i);
+      for(j=0, k=alignments->getReadsI(i); j < readsAlignmentsN; j++, k++){
+         if(alignments->getTrId(k) == 0){
+            phi[j] = alignments->getProb(k) * (1 - thetaAct);
+         }else{
+            phi[j] = alignments->getProb(k) *
+               thetaAct * theta[alignments->getTrId(k)];
+         }
+         probNorm += phi[j];
+      }
+      r = uniformDistribution(rng_mt);
+      // Apply Normalization constant:
+      r *= probNorm;
+      for(j = 0, sum = 0 ; (sum<r) && (j<readsAlignmentsN); j++){
+         sum += phi[j];
+      }
+      if(j==0){
+         // e.g. if probNorm == 0
+         // assign to noise.
+         C[0]++;
+      }else{
+         // Assign to the chosen transcript.
+         C[ alignments->getTrId( alignments->getReadsI(i)+j-1 ) ]++;
+      }
+   }
+   // TimeStats {{{
+#ifdef DoSTATS
+   gettimeofday(&end, NULL);
+   tZ += (end.tv_sec-start.tv_sec)*1000*1000+(end.tv_usec-start.tv_usec);
+#endif
+   // }}}
+}//}}}
+void GibbsSampler::sampleThetaAct(){//{{{
+#ifdef DoSTATS
+   nTa++;
+   struct timeval start, end;
+   gettimeofday(&start, NULL);
+#endif
+   double C0=C[0]+Nunmap,X,Y; 
+   // counting C_0 from all reads
+   // generate thetaAct~Beta(a,b) as thetaAct = X/(X+Y) ; X~Gamma(a,1), Y~Gamma(b,1)
+   gammaDistribution.param(gDP(beta->alpha + Nmap+Nunmap - C0, 1));
+   X = gammaDistribution(rng_mt);
+   gammaDistribution.param(gDP(beta->beta + C0, 1));
+   Y = gammaDistribution(rng_mt);
+   
+   thetaAct = X / (X+Y);
+#ifdef DoSTATS
+   gettimeofday(&end, NULL);
+   tTa += (end.tv_sec-start.tv_sec)*1000*1000+(end.tv_usec-start.tv_usec);
+#endif
+}//}}}
+void GibbsSampler::update(){//{{{
+   Sampler::update();
+
+   theta[0]=thetaAct; // save thetaAct as theta_0
+
+   updateSums();
+   if((doLog)&&(save))appendFile();
+}//}}}
+void GibbsSampler::sample(){//{{{
+   Sampler::sample();
+
+   sampleTheta();
+   sampleThetaAct();
+   sampleZ();
+}//}}}
diff --git a/GibbsSampler.h b/GibbsSampler.h
new file mode 100644
index 0000000..2eb387b
--- /dev/null
+++ b/GibbsSampler.h
@@ -0,0 +1,17 @@
+
+#include "Sampler.h"
+
+class GibbsSampler : public Sampler{
+   private:
+   double thetaAct;
+
+   void sampleThetaAct();
+   void sampleZ();
+      
+   public:
+
+   GibbsSampler();
+   
+   virtual void update();
+   virtual void sample();
+};
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..a6d5c11
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,129 @@
+CXX = g++
+ARCH = -mtune=generic
+VERSION = 0.7.5
+#	ARCH = -march=core2
+#	ARCH = -march=native
+
+
+# Use O1 for debuiggging so it's not totally slow.
+DBGFLAGS = -O1 -ggdb -U_FORTIFY_SOURCE
+COFLAGS = $(ARCH) -O2 -pipe
+CXXFLAGS = -DBS_VERSION=\"$(VERSION)\" -Wall $(COFLAGS)
+# -Wvla does not work with old gcc
+# -ffast-math segfaults with old gcc, don't use.
+LDFLAGS = -Wl,-gc-sections
+BOOSTFLAGS = -I .
+OPENMP = -fopenmp -DSUPPORT_OPENMP
+
+PROGRAMS = \
+   convertSamples \
+   estimateDE \
+   estimateExpression \
+   estimateHyperPar \
+   estimateVBExpression \
+   extractSamples \
+   getFoldChange \
+   getGeneExpression \
+   getPPLR \
+   getVariance \
+   getWithinGeneExpression \
+   parseAlignment \
+   transposeLargeFile
+
+all: $(PROGRAMS)
+
+COMMON_DEPS = ArgumentParser.o common.o FileHeader.o misc.o MyTimer.o
+# PROGRAMS:
+convertSamples: convertSamples.cpp $(COMMON_DEPS) TranscriptInfo.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) convertSamples.cpp $(COMMON_DEPS) TranscriptInfo.o -o convertSamples
+
+estimateDE: estimateDE.cpp $(COMMON_DEPS) PosteriorSamples.o
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) $(LDFLAGS) estimateDE.cpp $(COMMON_DEPS) PosteriorSamples.o -o estimateDE
+
+estimateExpression: estimateExpression.cpp $(COMMON_DEPS) CollapsedSampler.o GibbsParameters.o GibbsSampler.o Sampler.o TagAlignments.o TranscriptInfo.o transposeFiles.o
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) $(OPENMP) $(LDFLAGS) estimateExpression.cpp $(COMMON_DEPS) CollapsedSampler.o GibbsParameters.o GibbsSampler.o Sampler.o TagAlignments.o TranscriptInfo.o transposeFiles.o -o estimateExpression
+
+estimateHyperPar: estimateHyperPar.cpp $(COMMON_DEPS) lowess.o PosteriorSamples.o TranscriptExpression.o 
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) $(LDFLAGS) estimateHyperPar.cpp $(COMMON_DEPS) lowess.o PosteriorSamples.o TranscriptExpression.o -o estimateHyperPar
+
+estimateVBExpression: estimateVBExpression.cpp $(COMMON_DEPS) SimpleSparse.o TagAlignments.o TranscriptInfo.o transposeFiles.o VariationalBayes.o
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) $(OPENMP) $(LDFLAGS) estimateVBExpression.cpp $(COMMON_DEPS) SimpleSparse.o TagAlignments.o TranscriptInfo.o transposeFiles.o VariationalBayes.o -o estimateVBExpression
+
+extractSamples: extractSamples.cpp $(COMMON_DEPS) PosteriorSamples.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) extractSamples.cpp $(COMMON_DEPS) PosteriorSamples.o -o extractSamples
+
+getFoldChange: getFoldChange.cpp $(COMMON_DEPS) PosteriorSamples.o 
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) getFoldChange.cpp $(COMMON_DEPS) PosteriorSamples.o -o getFoldChange
+
+getGeneExpression: getGeneExpression.cpp $(COMMON_DEPS) PosteriorSamples.o TranscriptInfo.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) getGeneExpression.cpp $(COMMON_DEPS) PosteriorSamples.o TranscriptInfo.o -o getGeneExpression
+
+getPPLR: getPPLR.cpp $(COMMON_DEPS) PosteriorSamples.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) getPPLR.cpp $(COMMON_DEPS) PosteriorSamples.o -o getPPLR
+
+getVariance: getVariance.cpp $(COMMON_DEPS) PosteriorSamples.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) getVariance.cpp $(COMMON_DEPS) PosteriorSamples.o -o getVariance
+
+getWithinGeneExpression: getWithinGeneExpression.cpp $(COMMON_DEPS) PosteriorSamples.o TranscriptInfo.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) getWithinGeneExpression.cpp $(COMMON_DEPS) PosteriorSamples.o TranscriptInfo.o -o getWithinGeneExpression
+
+parseAlignment: parseAlignment.cpp $(COMMON_DEPS) ReadDistribution.o samtools/sam.o TranscriptExpression.o TranscriptInfo.o TranscriptSequence.o
+	$(CXX) $(CXXFLAGS) $(OPENMP) $(LDFLAGS) -pthread parseAlignment.cpp $(COMMON_DEPS) ReadDistribution.o samtools/*.o TranscriptExpression.o TranscriptInfo.o TranscriptSequence.o -lz -o parseAlignment
+
+transposeLargeFile: transposeLargeFile.cpp $(COMMON_DEPS) transposeFiles.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) transposeLargeFile.cpp $(COMMON_DEPS) transposeFiles.o -o transposeLargeFile
+
+# LIBRARIES:
+ArgumentParser.o: ArgumentParser.cpp ArgumentParser.h
+	$(CXX) $(CXXFLAGS) -ffunction-sections -fdata-sections -c ArgumentParser.cpp
+
+CollapsedSampler.o: CollapsedSampler.cpp CollapsedSampler.h GibbsParameters.h Sampler.h
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) -c CollapsedSampler.cpp
+
+FileHeader.o: common.h misc.h FileHeader.cpp FileHeader.h
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) -ffunction-sections -fdata-sections -c FileHeader.cpp
+
+GibbsSampler.o: GibbsSampler.cpp GibbsSampler.h GibbsParameters.h Sampler.h
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) -c GibbsSampler.cpp
+
+misc.o: ArgumentParser.h PosteriorSamples.h misc.cpp misc.h
+	$(CXX) $(CXXFLAGS) -ffunction-sections -fdata-sections -c misc.cpp
+
+MyTimer.o: MyTimer.h MyTimer.cpp
+	$(CXX) $(CXXFLAGS) -ffunction-sections -fdata-sections -c MyTimer.cpp
+
+PosteriorSamples.o: PosteriorSamples.cpp PosteriorSamples.h FileHeader.h
+	$(CXX) $(CXXFLAGS) -ffunction-sections -fdata-sections -c PosteriorSamples.cpp
+
+ReadDistribution.o: ReadDistribution.cpp ReadDistribution.h TranscriptExpression.h TranscriptInfo.h TranscriptSequence.h 
+	$(CXX) $(CXXFLAGS) $(OPENMP) -c ReadDistribution.cpp
+
+Sampler.o: Sampler.cpp Sampler.h GibbsParameters.h
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) -c Sampler.cpp
+
+SimpleSparse.o: SimpleSparse.cpp SimpleSparse.h
+	$(CXX) $(CXXFLAGS) $(OPENMP) -c SimpleSparse.cpp
+
+VariationalBayes.o: VariationalBayes.cpp VariationalBayes.h SimpleSparse.h
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) $(OPENMP) -c VariationalBayes.cpp 
+
+common.o: common.cpp common.h
+GibbsParameters.o: ArgumentParser.h GibbsParameters.cpp GibbsParameters.h
+lowess.o: lowess.cpp lowess.h
+TagAlignments.o: TagAlignments.cpp TagAlignments.h
+TranscriptExpression.o: TranscriptExpression.cpp TranscriptExpression.h
+TranscriptInfo.o: TranscriptInfo.cpp TranscriptInfo.h
+TranscriptSequence.o: TranscriptSequence.cpp TranscriptSequence.h
+transposeFiles.o: transposeFiles.cpp transposeFiles.h FileHeader.h
+
+# EXTERNAL LIBRARIES:
+samtools/sam.o:
+	make --directory samtools
+
+# CLEAN:
+clean:
+	rm *.o $(PROGRAMS)
+
+clean-all:
+	rm samtools/*.o *.o $(PROGRAMS)
+
diff --git a/MyTimer.cpp b/MyTimer.cpp
new file mode 100644
index 0000000..7bb34c0
--- /dev/null
+++ b/MyTimer.cpp
@@ -0,0 +1,60 @@
+#include<ctime>
+
+#include "MyTimer.h"
+
+#include "common.h"
+
+void MyTimer::adjust(double &time,char f){//{{{
+   if(f=='m')time/=60.0;
+   if(f=='h')time/=3600.0;
+}//}}}
+void MyTimer::write(double time,char f){//{{{
+   if(!quiet)messageF("[time: +%.2lf %c]\n",time,f);
+}//}}}
+MyTimer::MyTimer(){//{{{
+   N=1;
+   quiet=false;
+   times.resize(N);
+   times[0]=time(NULL);
+}//}}}
+void MyTimer::start(long timer){//{{{
+   if(timer>=N){
+      N=timer+1;
+      times.resize(N);
+   }
+   times[timer]=time(NULL);
+}//}}}
+double MyTimer::split(long timer, char f){//{{{
+   if(timer>=N)return 0;
+   double ret;
+   ret=time(NULL)-times[timer];
+   adjust(ret,f);
+   write(ret,f);
+   times[timer]=time(NULL);
+   return ret;
+}//}}}
+double MyTimer::getTime(long timer, char f){//{{{
+   if(timer>=N)return 0;
+   double ret;
+   ret=time(NULL)-times[timer];
+   adjust(ret,f);
+   return ret;
+}//}}}
+double MyTimer::current(long timer, char f){//{{{
+   if(timer>=N)return 0;
+   double ret;
+   ret=time(NULL)-times[timer];
+   adjust(ret,f);
+   write(ret,f);
+   return ret;
+}//}}}
+double MyTimer::stop(long timer, char f){//{{{
+   if(timer>=N)return 0;
+   double ret;
+   ret=time(NULL)-times[timer];
+   adjust(ret,f);
+   write(ret,f);
+   times[timer]=time(NULL);
+   return ret;
+}//}}}
+
diff --git a/MyTimer.h b/MyTimer.h
new file mode 100644
index 0000000..26d3bcd
--- /dev/null
+++ b/MyTimer.h
@@ -0,0 +1,28 @@
+#ifndef MYTIMER_H
+#define MYTIMER_H
+
+#include<vector>
+
+using namespace std;
+
+class MyTimer{
+ private:
+   vector<time_t> times;
+   long N;
+   bool quiet;
+   // Adjust time to format m or h.
+   void adjust(double &time,char f);
+   // Write time in format.
+   void write(double time,char f);
+ public:
+   MyTimer();
+   void setQuiet(){quiet=true;}
+   void setVerbose(){quiet=false;}
+   void start(long timer=0);
+   double split(long timer=0, char f='s');
+   double getTime(long timer=0, char f='s');
+   double current(long timer=0, char f='s');
+   double stop(long timer=0, char f='s');
+};
+
+#endif
diff --git a/PosteriorSamples.cpp b/PosteriorSamples.cpp
new file mode 100644
index 0000000..580478e
--- /dev/null
+++ b/PosteriorSamples.cpp
@@ -0,0 +1,299 @@
+#include<algorithm>
+#include<cstdlib>
+#include<vector>
+
+using namespace std;
+
+#include "PosteriorSamples.h"
+
+#include "FileHeader.h"
+#include "misc.h"
+
+#include "common.h"
+   
+#define Sof(x) (long)x.size()
+#define SS second
+#define FF first
+
+#define MINUS_INF -47
+#define PLUS_INF 1e10
+
+void PosteriorSamples::clear(){//{{{
+   N=0;
+   M=0;
+   norm = 1.0;
+   failed=true;
+   transposed=true;
+   areLogged=false;
+}//}}}
+bool PosteriorSamples::open(string fileName){//{{{
+   if(samplesF.is_open())samplesF.close();
+   samplesF.open(fileName.c_str());
+   if(!samplesF.is_open()){
+      error("PosterioSamples: File open failed: %s\n",(fileName).c_str());
+      failed=true;
+      return false;
+   }
+   return true;
+}//}}}
+bool PosteriorSamples::initSet(long *m,long *n, string fileName){//{{{
+   failed=false;
+   if(! open(fileName))return false;
+   
+   FileHeader fh(&samplesF);
+   if(!fh.samplesHeader(n,m,&transposed,&areLogged)){
+      error("PosteriorSamples: File header reading failed.\n");
+      failed=true;
+      return false;
+   }
+   N=*n;
+   M=*m;
+   return read();
+}//}}}
+bool PosteriorSamples::read(){//{{{
+   if(failed)return false;
+   if(transposed){
+      lines=vector<long>(M,-1);
+      lines[0]=samplesF.tellg();
+   }else{
+      if(N*M > PS_maxStoredSamples){
+         error("PosteriorSamples: Too many samples to store,use trasposed file.\n");
+         failed=true;
+         return false;
+      }
+      samples.resize(M,vector<double>(N,0));
+      for(long i=0;i<N;i++)
+         for(long j=0;j<M;j++)
+            samplesF>>samples[j][i];
+      if(!samplesF.good()){
+         failed=true;
+         return false;
+      }
+   }
+   return true;
+}//}}}
+bool PosteriorSamples::getTranscript(long tr,vector<double> &trSamples){//{{{
+   if((tr>=M)||(failed))return false;
+   string str;
+   bool good=true;
+   if(Sof(trSamples)!=N)trSamples.resize(N);
+   if(transposed){
+      long i;
+      if(lines[tr]==-1){
+         for(i=0;lines[i+1]!=-1;i++);
+         samplesF.seekg(lines[i]);
+         while((samplesF.good())&&(i<tr)){
+            i++;
+            samplesF.ignore(10000000,'\n');
+            lines[i]=samplesF.tellg();
+         }
+      }else{
+         samplesF.seekg(lines[tr]);
+      }
+      for(i=0;(i<N)&&(samplesF.good());i++){
+         samplesF>>trSamples[i];
+         // apply normalisation.
+         trSamples[i] *= norm;
+         if(samplesF.eof())break;
+         if(samplesF.fail()){
+            samplesF.clear();
+            samplesF.seekg(-1,ios::cur);
+            samplesF>>str;
+            if(ns_misc::toLower(str)=="-inf")trSamples[i]=MINUS_INF;
+            else if(ns_misc::toLower(str)=="nan")trSamples[i]=PLUS_INF;
+            else error("PosteriorSamples: Unknown value: %s in [tr:%ld,pos:%ld]\n",(str).c_str(),tr,i);
+            good=false;
+         }
+      }
+      if(i!=N){
+         good=false;
+         error("PosteriorSamples: Reading failed at position:  [tr:%ld,pos:%ld]\n",tr,i);
+      }
+   }else{
+      trSamples = samples[tr];
+      // FIXME(glausp) it is not very efficient to do this every time. 
+      // However this part only works for small data files.
+      if(norm!=1.0){
+         for(long i=0;i<N;i++)trSamples[i] *= norm;
+      }
+   }
+   return good;
+}//}}}
+void PosteriorSamples::close(){//{{{
+   samplesF.close();
+   failed=true;
+}//}}}
+
+
+Conditions::Conditions(){//{{{
+   mapping=false;
+   CN=0;
+   C=0;
+}//}}}
+long Conditions::getIndex(long max){ // {{{returns index, without checking for duplicates
+   return rand() % max;
+}//}}}
+long Conditions::getRC(long c) const { //{{{
+   if(c>C)return -1;
+   return cIndex[c].SS;
+}//}}}
+bool Conditions::init(string trFileName, vector<string> filesGot, long *m, long *n){//{{{
+   long c;
+   return init(trFileName,filesGot,&c,m,n);
+}//}}}
+bool Conditions::init(string trFileName, vector<string> filesGot, long *c, long *m, long *n){//{{{
+   long i,j,x,colN;
+   bool sameMs=true;
+   vector<string> files;
+   cIndex.resize(1,pair<long,long>(0,0));
+   for(i=0;i<Sof(filesGot);i++){
+      if(filesGot[i]=="C"){
+         if((cIndex.end()-1)->SS!=0){
+            cIndex.push_back(pair<long,long>(Sof(files),0));
+         }
+      }else{
+         (cIndex.end()-1)->SS++;
+         files.push_back(filesGot[i]);
+      }
+   }
+   if((cIndex.end()-1)->SS==0){
+      cIndex.pop_back();
+   }
+   C = Sof(cIndex);
+   *c = C;
+   //message("File names processed.\n");
+
+   CN = Sof(files);
+   samples.resize(CN);
+   Ms.resize(CN);
+   Ns.resize(CN);
+   if(! samples[0].initSet(&Ms[0],&Ns[0],files[0])){
+      error("Conditions: file %s failed to open.\n",(files[0]).c_str());
+      return false;
+   }
+   areLogged = samples[0].logged();
+   N=Ns[0];
+   M=Ms[0];
+   for(i=1;i<CN;i++){
+      if(! samples[i].initSet(&Ms[i],&Ns[i],files[i])){
+         error("Conditions: file %s failed to open.\n",(files[i]).c_str());
+         return false;
+      }
+      if(areLogged != samples[i].logged()){
+         error("Conditions: Problem reading %s: some samples are logged and some are not.\n",(files[i]).c_str());
+         return false;
+      }
+      if(M!=Ms[i]){
+         sameMs=false;
+      }
+      if(N>Ns[i])N=Ns[i];
+   }
+   *n=N;
+
+   ifstream trFile(trFileName.c_str());
+   if(! trFile.is_open()){
+   // if there is no transcript join file, the we have to make sure that Ms are the same
+      if(sameMs){
+         M=Ms[0];
+         *m=M;
+         mapping = false;
+         return true;
+      }else{
+         error("Conditions: Different number of transcripts and missing transcript-join file\n");
+         return false;
+      }  
+   }else{
+      FileHeader fh(&trFile);
+      if((!fh.transcriptsHeader(&M,&colN))||(M==0)||(colN<CN+1)){
+         error("Conditions: Wrong transcript join descriptor file - m: %ld colN: %ld\n",M,colN);
+         return false;
+      }
+      *m=M;
+      trMap.resize(M,vector<long>(CN));
+      for(i=0;i<M;i++){
+         trFile>>x;
+         for(j=0;j<colN;j++)
+            if(j<CN)trFile >> trMap[i][j];
+            else trFile >> x;
+      }
+      trFile.close();
+      sort(trMap.begin(),trMap.end());// sort for faster disc access
+      mapping=true;
+      return true;
+   }
+   return false; // we should not get here
+}//}}}
+bool Conditions::setNorm(vector<double> norms){//{{{
+   if((long)norms.size()!=CN){
+      error("Conditions: The number of normalization constants does not match number of experiments (files with samples).\n");
+      return false;
+   }
+   for(long i=0;i<CN;i++){
+      samples[i].setNorm(norms[i]);
+   }
+   return true;
+}//}}}
+bool Conditions::getTranscript(long cond, long rep, long tr, vector<double> &trSamples){//{{{
+   if((cond>C)||(rep>cIndex[cond].SS)){
+      trSamples.clear();
+      return false;
+   }
+   return getTranscript(rep+cIndex[cond].FF, tr, trSamples);
+}//}}}
+bool Conditions::getTranscript(long cond, long tr, vector<double> &trSamples){//{{{
+   bool status=false;
+   static vector<double> tmpSamples;
+   if(cond>=CN){
+      error("Conditions: Wrong condition request.\n");
+      return false;
+   }
+   if(tr>=M){
+      error("Conitions: Wrong transcript request.\n");
+      return false;
+   }
+   if(mapping) tr = trMap[tr][cond];
+   if(N != Ns[cond]){
+      status = samples[cond].getTranscript(tr, tmpSamples);
+      if(Sof(trSamples) != N)trSamples.resize(N);
+      for(long i=0;i<N;i++)trSamples[i] = tmpSamples[ getIndex(Ns[cond]) ];
+   }else{
+      status = samples[cond].getTranscript(tr, trSamples);
+   }
+   return status;
+}//}}}
+bool Conditions::getTranscript(long cond, long tr, vector<double> &trSamples, long samplesN){//{{{
+   bool status=false;
+   static vector<double> tmpSamples;
+   if(cond>=CN){
+      error("Conditions: Wrong condition request.\n");
+      return false;
+   }
+   if(tr>=M){
+      error("Conitions: Wrong transcript request.\n");
+      return false;
+   }
+   if(samplesN > Ns[cond]){
+      error("Conitions: Wrong not enough samples.\n");
+      return false;
+   }
+   if(samplesN <1){
+      error("Conitions: Wrong number of samples.\n");
+      return false;
+   }
+   if(mapping)tr=trMap[tr][cond];
+   if(samplesN != Ns[cond]){
+      status = samples[cond].getTranscript(tr, tmpSamples);
+      if(Sof(trSamples) != samplesN)trSamples.resize(samplesN);
+      for(long i=0;i<samplesN;i++)
+         trSamples[i] = tmpSamples[ getIndex(Ns[cond]) ];
+   }else{
+      status = samples[cond].getTranscript(tr, trSamples);
+   }
+   return status;
+}//}}}
+void Conditions::close(){//{{{
+   for(long i=0;i<CN;i++){
+      samples[i].close();
+   }
+   cIndex.clear();
+}//}}}
diff --git a/PosteriorSamples.h b/PosteriorSamples.h
new file mode 100644
index 0000000..bdb6180
--- /dev/null
+++ b/PosteriorSamples.h
@@ -0,0 +1,66 @@
+#ifndef POSTERIORSAMPLES_H
+#define POSTERIORSAMPLES_H
+
+#include<vector>
+#include<fstream>
+#include<string>
+
+using namespace std;
+
+const long PS_maxStoredSamples = 100000000;
+
+class PosteriorSamples{//{{{
+   private:
+      long N,M;
+      double norm;
+      bool transposed,failed,areLogged;
+      ifstream samplesF;
+      vector<long> lines;
+      vector<vector<double> > samples;
+
+      bool open(string fileName);
+      bool read();
+   public:
+   PosteriorSamples() { clear(); }
+   ~PosteriorSamples() { close(); }
+   // Copy constructor and assginment. Both just create new class. For vectors only.
+   PosteriorSamples(const PosteriorSamples &other) { clear(); }
+   PosteriorSamples& operator=(const PosteriorSamples & other) { //{{{
+      close();
+      clear();
+      return *this;
+   } //}}}
+   void clear();
+   bool initSet(long *m, long *n, string fileName);
+   bool getTranscript(long tr, vector<double> &trSamples);
+   void close();
+   bool logged(){return areLogged;}
+   void setNorm(double norm){this->norm = norm;}
+};//}}}
+
+class Conditions{//{{{
+   private:
+      long M,N,CN,C;
+      bool mapping,areLogged;
+      vector<long> Ms,Ns;
+      vector<vector <long> > trMap;
+      vector<PosteriorSamples> samples;
+      vector<pair<long,long> > cIndex;
+      
+      long getIndex(long max); // return index without checking for duplicats
+   public:
+      Conditions();
+      void close();
+      long getRC(long c) const;
+      long getRN() const { return CN;}
+      long getC() const { return C;}
+      bool init(string trFileName, vector<string> filesGot, long *c, long *m, long *n);
+      bool init(string trFileName, vector<string> filesGot, long *m, long *n);
+      bool setNorm(vector<double> norms);
+      bool getTranscript(long cond, long rep, long tr, vector<double> &trSamples);
+      bool getTranscript(long cond, long tr, vector<double> &trSamples);
+      bool getTranscript(long cond, long tr, vector<double> &trSamples, long samplesN);
+      bool logged() const { return areLogged; }
+};//}}}
+
+#endif
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5f64e41
--- /dev/null
+++ b/README.md
@@ -0,0 +1,34 @@
+Dependencies:
+---------------------------------------
+GNU make, g++ , zlib (for samtools API used in parseAlignment program)
+Optional:
+python - for helper scripts getCount.py, extractTranscriptInfo.py
+
+
+Compilation:
+--------------------------------------
+Enter BitSeq directory and run:
+make
+
+
+Help & Usage:
+--------------------------------
+See wiki for more information:
+https://github.com/BitSeq/BitSeq/wiki/Basic-usage
+
+
+Contact:
+----------------------------------
+Please use local issue tracker: https://github.com/BitSeq/BitSeq/issues for 
+help, issue resolution and comments.
+
+For direct contact, please contact Peter Glaus (glaus [at] cs.man.ac.uk).
+
+
+License:
+----------------------------------
+Artistic-2.0 
+ + Boost_1_0 for directory boost 
+ + MIT for directory samtools
+ + LGPL for directory asa103
+
diff --git a/ReadDistribution.cpp b/ReadDistribution.cpp
new file mode 100644
index 0000000..f9ea7c4
--- /dev/null
+++ b/ReadDistribution.cpp
@@ -0,0 +1,1126 @@
+#include<algorithm>
+#include<cmath>
+#ifdef _OPENMP
+#include<omp.h>
+#endif
+
+#include "ReadDistribution.h"
+
+#include "misc.h"
+#include "MyTimer.h"
+
+#include "common.h"
+
+#define DEBUG(x) 
+
+namespace ns_rD {
+// Base 2 Int mapping. //{{{
+vector<char> tableB2I;
+vector<int> tableB2BI;
+//}}}
+/*void inline progressLogRD(long cur,long outOf) {//{{{
+   // output progress status every 10%
+   if((outOf>10)&&(cur%((long)(outOf/10))==0)&&(cur!=0))message("# %ld done.\n",cur);
+}//}}} */
+void fillTable() {//{{{
+   if(tableB2I.size()<256){
+      tableB2I.assign(256,-1);
+      tableB2I['A'] = tableB2I['a'] = 0;
+      tableB2I['C'] = tableB2I['c'] = 1;
+      tableB2I['G'] = tableB2I['g'] = 2;
+      tableB2I['T'] = tableB2I['t'] = 3;
+   }
+   if(tableB2BI.size()>=256)return;
+   tableB2BI.assign(256,15);
+   tableB2BI['A'] = tableB2BI['a'] = 1;
+   tableB2BI['C'] = tableB2BI['c'] = 2;
+   tableB2BI['G'] = tableB2BI['g'] = 4;
+   tableB2BI['T'] = tableB2BI['t'] = 8;
+}//}}}
+inline char base2int(char B){//{{{
+   /* switch(B){
+      case 'A': case 'a': return 0; 
+      case 'C': case 'c': return 1; 
+      case 'G': case 'g': return 2; 
+      case 'T': case 't': return 3;
+      default: return -1;
+   } */
+   return tableB2I[B];
+}//}}}
+inline int base2BAMint(char B){//{{{
+   return tableB2BI[B];
+}//}}}
+template<class keyT,class valT> inline void mapAdd(map<keyT,valT> &m, keyT key, valT val){//{{{
+   if(m.count(key)==0)
+      m[key] = val;
+   else
+      m[key] += val;
+}//}}}
+inline bool readHasPhred(const bam1_t *samA){//{{{
+   if(samA->core.l_qseq < 1) return false;
+   return bam1_qual(samA)[0] != 0xff;
+}//}}}
+// Count (number of deletions) - (number of insertions). {{{
+long countDeletions(const bam1_t *samA){
+   long deletionN = 0;
+   for(long i=0;i<samA->core.n_cigar;i++){
+      switch(bam1_cigar(samA)[i]&BAM_CIGAR_MASK){
+         case BAM_CDEL:
+            deletionN += (long)(bam1_cigar(samA)[i]>>BAM_CIGAR_SHIFT);
+            break;
+         case BAM_CINS:
+            deletionN -= (long)(bam1_cigar(samA)[i]>>BAM_CIGAR_SHIFT);
+            break;
+      }
+   }
+   return deletionN;
+}//}}}
+inline bool getCigarOp(const bam1_t *samA, long cigarI, long *cigarOp, long *cigarOpCount){//{{{
+   if((cigarI<0) || (cigarI >= samA->core.n_cigar)) return false;
+   *cigarOp = bam1_cigar(samA)[cigarI]&BAM_CIGAR_MASK;
+   *cigarOpCount = (long)(bam1_cigar(samA)[cigarI]>>BAM_CIGAR_SHIFT);
+   return true;
+}//}}}
+} // namespace ns_rD
+
+using namespace ns_rD;
+
+ReadDistribution::ReadDistribution(){ //{{{
+   M=0;
+   uniform = lengthSet = gotExpression = normalized = validLength = false;
+   warnFirst = false;
+   warnPos = warnTIDmismatch = warnUnknownTID = noteFirstMateDown = 0;
+   procN = 1; 
+#ifdef _OPENMP
+   omp_set_num_threads(procN);
+#endif
+   lMu=100;
+   lSigma=10;
+   verbose = true;
+   singleReadLength = 0;
+   minFragLen=10000;
+   lowProbMismatches = LOW_PROB_MISSES;
+   lProbMis.resize(256,0);
+   lProbHit.resize(256,0);
+   for(long i=0; i<256; i++){
+      lProbMis[i] = - i / 10.0 * log(10.0);
+      lProbHit[i] = log1p(-exp(lProbMis[i]));
+   }
+   fillTable();
+}//}}}
+void ReadDistribution::writeWarnings() {//{{{
+   if(warnPos>0){
+      warning("ReadDistribution: %ld reads from a pair did not align to the expected strand of a transcript.\n   Use --unstranded option in case the 5' and 3' mate are not expected to be from sense and anti-sense strands respectively.\n", warnPos);
+   }
+   if(warnTIDmismatch>0){
+      warning("ReadDistribution: %ld pair reads were aligned to different transcripts.\n", warnTIDmismatch);
+   }
+   if(warnUnknownTID>0){
+      warning("ReadDistribution: %ld fragments were aligned to unknown transcripts.\n", warnUnknownTID);
+   }
+   if(noteFirstMateDown){
+      message("NOTE: ReadDistribution: First mate from a pair was downstream (%ld times).\n", noteFirstMateDown);
+   }
+   warnPos = warnTIDmismatch = warnUnknownTID = noteFirstMateDown = 0;
+}//}}}
+void ReadDistribution::setProcN(long procN){//{{{
+   if(procN<0)procN=1;
+   if(procN>32)procN=4;
+#ifdef _OPENMP
+   this->procN = procN;
+   omp_set_num_threads(procN);
+#else
+   this->procN = 1;
+#endif
+}//}}}
+void ReadDistribution::showFirstWarnings(){//{{{
+   warnFirst = true;
+}//}}}
+bool ReadDistribution::init(long m, TranscriptInfo* trI, TranscriptSequence* trS, TranscriptExpression* trE, bool unstranded, bool verb){ //{{{
+   M = m;
+   verbose = verb;
+   if(trI==NULL){
+      error("ReadDistribution: Missing TranscriptInfo.\n");
+      return false;
+   }
+   if(trS==NULL){
+      error("ReadDistribution: Missing TranscriptSequence.\n");
+      return false;
+   }
+   uniform = false;
+   this->unstranded = unstranded;
+   trInf=trI;
+   trSeq=trS;
+   trExp=trE;
+   if(trExp) gotExpression = true;
+   else gotExpression = false;
+   lengthSet = false;
+   logLengthSum = logLengthSqSum = 0;
+   fragSeen = 0;
+   // Initialize tr - frag_length - expression maps:
+   trFragSeen5.resize(M);
+   trFragSeen3.resize(M);
+   weightNorms.resize(3,vector<map<long, double> >(M));
+   // Initialize position bias matrices:
+   posProb.resize( 6, vector<vector<double> >(trSizesN + 1, vector<double>(trNumberOfBins,0.01/trNumberOfBins)));
+   // Initialize sequence bias VLMMs: 
+   seqProb.resize(4);
+   for(long i=0;i<vlmmNodesN;i++){
+      for(long j=0;j<4;j++)
+         seqProb[j].push_back(VlmmNode(vlmmNodeDependence[i]));
+   }
+   return true;
+}//}}}
+bool ReadDistribution::initUniform(long m, TranscriptInfo* trI, TranscriptSequence* trS, bool verb){ //{{{
+   M = m;
+   verbose = verb;
+   if(trI==NULL){
+      error("ReadDistribution: Missing TranscriptInfo.\n");
+      return false;
+   }
+   trInf = trI;
+   trSeq = trS;
+   trExp = NULL;
+   uniform = true;
+   lengthSet = false;
+   gotExpression = false;
+   logLengthSum = logLengthSqSum = 0;
+   fragSeen = 0;
+   return true;
+}//}}}
+void ReadDistribution::setLowProbMismatches(long m){//{{{
+   lowProbMismatches = m>1 ? m:1;
+}//}}}
+void ReadDistribution::setLength(double mu, double sigma){ //{{{
+   lMu=mu;
+   lSigma=sigma;
+   lengthSet=true;
+   validLength=true;
+   computeLengthProb();
+}//}}}
+bool ReadDistribution::observed(fragmentP frag){ //{{{
+   DEBUG(message("%s===%s\n",bam1_qname(frag->first),bam1_qname(frag->second));)
+   long tid = frag->first->core.tid;
+   if((tid < 0)||(tid>=M)){
+      if(warnFirst && (warnUnknownTID==0))
+         warning("TID unknown: %s: %ld\n",bam1_qname(frag->first),tid);
+      warnUnknownTID++;
+      return false;
+   }
+   if((frag->paired)&&(tid!=frag->second->core.tid)){
+      if(warnFirst && (warnTIDmismatch==0))
+         warning("TID mismatch: %s: %s %s\n",bam1_qname(frag->first),
+                 trInf->trName(tid).c_str(),
+                 trInf->trName(frag->second->core.tid).c_str());
+      warnTIDmismatch++;
+      return false;
+   }
+   // Set inverse expression
+   double Iexp = (gotExpression)? 1.0/trExp->exp(tid) : 1.0;
+   // Calculate reads' true end position:
+   long frag_first_endPos, frag_second_endPos=0;
+   frag_first_endPos = bam_calend(&frag->first->core, bam1_cigar(frag->first));
+   if(frag->paired){
+      frag_second_endPos = bam_calend(&frag->second->core, bam1_cigar(frag->second));
+   }
+   // update lengths: //{{{
+   DEBUG(message("   length update\n");)
+   double len,logLen;
+   if(frag->paired){
+      fragSeen ++;
+      if(frag->second->core.pos>frag->first->core.pos)
+         len = frag_second_endPos  - frag->first->core.pos;
+      else{
+         len = frag_first_endPos - frag->second->core.pos;
+      }
+      if(minFragLen>(long)len)minFragLen = (long) len;
+      logLen = log(len);
+      logLengthSum += logLen;
+      logLengthSqSum += logLen*logLen;
+      DEBUG(if(len<=75)message("%s %ld %d %ld %d %ld\n",bam1_qname(frag->first), len, frag->first->core.pos,frag_first_endPos,frag->second->core.pos,frag_second_endPos));
+      mapAdd(fragLengths,(long)len,(long)1);
+   }else{
+      len = frag_first_endPos - frag->first->core.pos;
+      singleReadLength = (long)len;
+      if(singleReadLength<minFragLen)minFragLen = singleReadLength;
+   } //}}}
+   // Update Mismatch frequencies if no Phred. //{{{
+   if((!readHasPhred(frag->first)) || (frag->paired && !readHasPhred(frag->second))){
+      updateMismatchFreq(frag->first);
+      if(frag->paired)updateMismatchFreq(frag->second);
+   }
+   // }}}
+   // for uniform distribution ignore other estimation:
+   if(uniform) return true;
+
+   // check mates relative position: {{{
+   if((frag->paired) && (frag->first->core.pos > frag->second->core.pos)){
+      noteFirstMateDown ++;
+      bam1_t *tmp = frag->second;
+      frag->second = frag->first;
+      frag->first = tmp;
+   }
+   if((frag->paired) && (!unstranded) && 
+      ((frag->first->core.flag & BAM_FREVERSE) ||
+       (! frag->second->core.flag & BAM_FREVERSE))){
+      if(warnFirst && (warnPos==0))
+         warning("wrong strand: %s: %s\n",bam1_qname(frag->first),
+                 trInf->trName(tid).c_str());
+      warnPos ++;
+      return false;
+   }//}}}
+   // positional bias:
+   // sequence bias:
+   DEBUG(message("   positional & sequence bias\n");)
+   if(! frag->paired){
+      if(frag->first->core.flag & BAM_FREVERSE){
+         // Antisense strand of transcript is 3'end of fragment
+         updatePosBias(frag_first_endPos, readM_3, tid, Iexp);
+         // readM_5 and uniformM_5 are always "second mates" 
+         // this is assumed also in getP(...);
+         updateSeqBias(frag_first_endPos, readM_3, tid, Iexp);
+         // update sum of expression of  fragments of given length
+         mapAdd(trFragSeen3[tid], (long)len, Iexp);
+      }else{
+         // Sense strand of transcript is 5'end of fragment
+         updatePosBias( frag->first->core.pos, readM_5, tid, Iexp);
+         updateSeqBias( frag->first->core.pos, readM_5, tid, Iexp);
+         mapAdd(trFragSeen5[tid], (long)len, Iexp);
+      }
+   }else{
+      updatePosBias( frag->first->core.pos, readM_5, tid, Iexp);
+      updateSeqBias( frag->first->core.pos, readM_5, tid, Iexp);
+      mapAdd(trFragSeen5[tid], (long)len, Iexp);
+         
+      updatePosBias( frag_second_endPos, readM_3, tid, Iexp);
+      updateSeqBias( frag_second_endPos, readM_3, tid, Iexp);
+      mapAdd(trFragSeen3[tid], (long)len, Iexp);
+   }
+   return true;
+}//}}}
+void ReadDistribution::normalize(){ //{{{
+   // length distribution: {{{
+   double newMu=0, newSigma=0;
+  
+   if(fragSeen>10){
+      // Estimate mean and sigma for length distribution.
+      newMu = logLengthSum / fragSeen;
+      newSigma = sqrt(logLengthSqSum / fragSeen - newMu*newMu);
+      if(verbose)message("ReadDistribution: fragment length mu: %lg sigma: %lg\n",newMu,newSigma);
+      validLength = true;
+   }
+   if(lengthSet){
+      // check difference between estimated mean and provided mean
+      if(abs(newMu-lMu)>lSigma){
+         warning("ReadDistribution: Estimated length mean (%lg) differs too much from the one provided (%lg).\n",newMu,lMu);
+      }
+   }else{
+      // Use estimated mean and sigma;
+      lMu = newMu;
+      lSigma = newSigma;
+      if(validLength)computeLengthProb();
+   }
+   // }}}
+   // mismatch frequencies: {{{
+   double lFreqSum;
+   for(size_t i=0;i<lFreqHit.size();i++){
+      lFreqSum = log(lFreqHit[i]+lFreqMis[i]);
+      lFreqHit[i] = log(lFreqHit[i]) - lFreqSum;
+      lFreqMis[i] = log(lFreqMis[i]) - lFreqSum;
+   }
+   // }}}
+   if(uniform) return;
+   map<long,double>::iterator mIt;
+   long i,j,m,group,trLen,fragLen;
+   double Iexp,norm;
+   double binSize;
+   // set Uniform position position bias: //{{{
+   if(verbose)message("ReadDistribution: Computing uniform positional bias.\n");
+   for(m=0;m<M;m++){
+      //if(verbose)progressLogRD(m,M);
+      trLen = trInf->L(m);
+      if(trLen<trNumberOfBins)continue;
+      binSize = (double)trLen / trNumberOfBins;
+      //message(" %ld %ld %ld\n",m,trLen,trFragSeen[m].size());
+      for(group=0;group<trSizesN;group++)
+         if(trLen<trSizes[group])break;
+      // update 5' positional bias
+      for( mIt=trFragSeen5[m].begin(); mIt != trFragSeen5[m].end(); mIt++){
+         fragLen = mIt->first;
+         Iexp = mIt->second / (trLen - fragLen + 1);
+         for(i=0;i<trNumberOfBins;i++){
+            // update probability of each bin by Iexp*"effective length of current bin"
+            if((i+1) * binSize <= fragLen)continue;
+            if(i * binSize < fragLen){
+               posProb[uniformM_5][group][trNumberOfBins -1 -i] +=
+                  Iexp * ((i+1) * binSize - fragLen + 1);
+            }else{
+               posProb[uniformM_5][group][trNumberOfBins -1 -i] +=
+                  Iexp * binSize;
+            }
+         }
+      }  
+      // update 3' positional bias
+      for( mIt=trFragSeen3[m].begin(); mIt != trFragSeen3[m].end(); mIt++){
+         fragLen = mIt->first;
+         Iexp = mIt->second / (trLen - fragLen + 1);
+         for(i=0;i<trNumberOfBins;i++){
+            // update probability of each bin by Iexp*"effective length of current bin"
+            if((i+1) * binSize <= fragLen)continue;
+            if(i * binSize < fragLen){
+               posProb[uniformM_3][group][i] +=
+                  Iexp * ((i+1) * binSize - fragLen + 1);
+            }else{
+               posProb[uniformM_3][group][i] +=
+                  Iexp * binSize;
+            }
+         }
+      }  
+   }// }}}
+   // pre-compute position bias weights: {{{
+   for(j=0;j<4;j++)
+      for(group=0;group<=trSizesN;group++){
+         norm = 0;
+         for(i=0;i<trNumberOfBins;i++)norm += posProb[j][group][i];
+         for(i=0;i<trNumberOfBins;i++)posProb[j][group][i] /= norm;
+      }
+   for(group=0;group <= trSizesN;group++){
+      for(i=0;i<trNumberOfBins;i++){
+         // FIX HERE
+         posProb[weight_5][group][i] = posProb[readM_5][group][i]/posProb[uniformM_5][group][i];
+         // FIX HERE
+         posProb[weight_3][group][i] = posProb[readM_3][group][i]/posProb[uniformM_3][group][i];
+      }
+   }//}}}
+   //set Uniform sequence bias: {{{
+   if(verbose)message("ReadDistribution: Computing uniform sequence bias.\n");
+   double IexpSum5,IexpSum3;
+   map<long,double>::reverse_iterator mItR;
+   long p;
+   for(m=0;m<M;m++){
+      //if(verbose)progressLogRD(m,M);
+      trLen = trInf->L(m);
+      IexpSum5=0;
+      for(mIt=trFragSeen5[m].begin();mIt!= trFragSeen5[m].end();mIt++)
+         IexpSum5+=mIt->second / (trLen - mIt->first + 1);
+      IexpSum3=0;
+      mItR=trFragSeen5[m].rbegin();
+      mIt=trFragSeen3[m].begin();
+      // STL map iterator IS sorted by key <=> length
+      for(p=0;p<trLen;p++){
+         while((mIt!=trFragSeen3[m].end())&&(mIt->first <= p+1)){IexpSum3+=mIt->second/ (trLen - mIt->first + 1); mIt++;}
+         while((mItR!=trFragSeen5[m].rend())&&(trLen-p < mItR->first)){IexpSum5-= mItR->second / (trLen - mItR->first + 1) ; mItR++;}
+         updateSeqBias(p, uniformM_5, m, IexpSum5);
+         // 3' end is expected to be "after"
+         updateSeqBias(p+1, uniformM_3, m, IexpSum3);
+      }
+   }//}}}
+   // normalize VLMM nodes: {{{
+   for(i=0;i<vlmmNodesN;i++){
+      for(long j=0;j<4;j++)
+         seqProb[j][i].normalize();
+   }//}}} 
+}//}}}
+void ReadDistribution::logProfiles(string logFileName){//{{{
+   ofstream outF;
+   outF.open(logFileName.c_str());
+   outF.precision(6);
+   outF<<scientific;
+   if(!outF.is_open()){
+      error("ReadDistribution: Unable to open profile file: %s\n",(logFileName).c_str());
+      return;
+   }
+   long i,j,g;
+   outF<<"# BASES: (readM_5, readM_3, uniformM_5, uniformM_3)"<<endl;
+   if(!uniform){
+      for(j=0;j<4;j++){
+         outF<<"# "<<endl;
+         for(i=0;i<vlmmNodesN;i++){
+            outF<<seqProb[j][i].getPsum('A')<<" "<<seqProb[j][i].getPsum('C')<<" "<<seqProb[j][i].getPsum('G')<<" "<<seqProb[j][i].getPsum('T')<<endl;
+         }
+      }
+   }
+
+   outF<<"#\n# Position: (readM_5, readM_3, uniformM_5, uniformM_3, weight_5, weight_3)"<<endl;
+   if(!uniform){
+      for(j=0;j<6;j++){
+         outF<<"# "<<endl;
+         for(g=0;g<=trSizesN;g++){
+            for(i=0;i<trNumberOfBins;i++)
+               outF<<posProb[j][g][i]<<" ";
+            outF<<endl;
+         }
+      }
+   }
+   outF<<"# Mismatch likelihood: (probHit, probMis)"<<endl;
+   if(!lFreqHit.empty()){
+      for(i=0;i<(long)lFreqHit.size();i++)outF<<exp(lFreqHit[i])<<" ";
+      outF<<endl;
+      for(i=0;i<(long)lFreqMis.size();i++)outF<<exp(lFreqMis[i])<<" ";
+      outF<<endl;
+   }
+   outF<<"# Fragment lengths:\n";
+   if(validLength){
+      outF<<"# Distribution parameters: mu: "<<lMu<<" sigma: "<<lSigma<<endl;
+      outF<<"# Length distribution: (length, counts) L "<<fragLengths.size()<<endl;
+      for(map<long,long>::iterator it=fragLengths.begin();it!=fragLengths.end();it++)
+         outF<<it->first<<" ";
+      outF<<endl;
+      for(map<long,long>::iterator it=fragLengths.begin();it!=fragLengths.end();it++)
+         outF<<it->second<<" ";
+      outF<<endl;
+   }
+   outF.close();
+}//}}}
+void ReadDistribution::updateMismatchFreq(bam1_t *samA) {//{{{
+   if(! samA) return;
+   bam1_core_t *samC = &samA->core;
+   long i,j,k,kStart,kDir,len=samC->l_qseq;
+   // Make sure we have place for storing data.
+   if(len>(long)lFreqHit.size()){
+      lFreqHit.resize(len,1.0);
+      lFreqMis.resize(len,1.0);
+   }
+   // Set direction for storing mismatches depending on read orientation.
+   if(samC->flag & BAM_FREVERSE){
+      kStart = len - 1;
+      kDir = -1;
+   }else{
+      kStart = 0;
+      kDir = +1;
+   }
+   long deletionN = countDeletions(samA);
+   string seq = trSeq->getSeq(samC->tid, samC->pos, len+deletionN, false);
+   long cigarOp,cigarI,cigarOpCount;
+   cigarOp=cigarI=cigarOpCount=0;
+   // i - iterates within reference sequence
+   // j - iterates within read
+   // k - iterates within frequency arrays, can be reversed
+   for(i=j=0,k=kStart;(i<len+deletionN) && (j<len);){
+      if(cigarOpCount == 0){
+         if(! getCigarOp(samA, cigarI, &cigarOp, &cigarOpCount))break;
+         cigarI++;
+      }
+      switch(cigarOp){
+         case BAM_CDEL: i+=cigarOpCount; cigarOpCount=0; continue;
+         case BAM_CINS:
+            j+= cigarOpCount; 
+            k+= kDir * cigarOpCount;
+            cigarOpCount=0; 
+            continue;
+      }
+      if(base2int(seq[i]) > -1){
+         if(base2BAMint(seq[i]) != bam1_seqi(bam1_seq(samA),j))lFreqMis[k]+=1;
+         else lFreqHit[k]+=1;
+      }
+      i++;
+      j++;
+      k+=kDir;
+      cigarOpCount --;
+   }
+}//}}}
+pair<double,double> ReadDistribution::getSequenceLProb(bam1_t *samA) const{//{{{
+   if(! samA) return pair<double, double>(0,0);
+   double lProb=0,lowLProb=0, lPHit, lPMis;
+   bam1_core_t *samC = &samA->core;
+   uint8_t *qualP=bam1_qual(samA);
+   bool hasPhred = readHasPhred(samA);
+   long i,j,k,len=samC->l_qseq;
+   long deletionN = countDeletions(samA);
+   string seq = trSeq->getSeq(samC->tid, samC->pos, len+deletionN, false);
+   long hitC, misC, addMisC;
+   long cigarOp,cigarI,cigarOpCount;
+   bool reversed = (samC->flag & BAM_FREVERSE);
+
+   // First count the number fo misses to add for low probability. {{{
+   cigarOp = cigarI = cigarOpCount = 0;
+   hitC = misC = 0;
+   // i - iterates within reference sequence
+   // j - iterates within read
+   for(i=j=0;(i<len+deletionN) && (j<len);){
+      if(cigarOpCount == 0){
+         if(! getCigarOp(samA, cigarI, &cigarOp, &cigarOpCount))break;
+         cigarI++;
+      }
+      switch(cigarOp){
+         case BAM_CDEL: i+=cigarOpCount; cigarOpCount=0; continue;
+         case BAM_CINS: j+=cigarOpCount; cigarOpCount=0; continue;
+      }
+      if((base2int(seq[i]) == -1)||
+         (base2BAMint(seq[i]) != bam1_seqi(bam1_seq(samA),j)))misC++;
+      else hitC++;
+      i++;
+      j++;
+      cigarOpCount --;
+   }
+   addMisC = max((long)1, lowProbMismatches - misC);
+   // }}}
+   
+   cigarOp = cigarI = cigarOpCount = 0;
+   for(i=j=0;(i<len+deletionN) && (j<len);){
+      if(cigarOpCount == 0){
+         if(! getCigarOp(samA, cigarI, &cigarOp, &cigarOpCount))break;
+         cigarI++;
+      }
+      switch(cigarOp){
+         case BAM_CDEL: i+=cigarOpCount; cigarOpCount=0; continue;
+         case BAM_CINS: j+=cigarOpCount; cigarOpCount=0; continue;
+         /*case BAM_CMATCH:
+         case BAM_CEQUAL:
+         case BAM_CDIFF:*/
+      }
+      if(hasPhred){
+         lPHit = lProbHit[qualP[j]];
+         lPMis = lProbMis[qualP[j]];
+      }else{
+         if(!reversed)k = j;
+         else k = len-j-1;
+         if((k>=0)&&(k<(long)lFreqHit.size())){
+            lPHit = lFreqHit[k];
+            lPMis = lFreqMis[k];
+         }else{
+            lPHit = lPMis = 0.5;
+         }
+      }
+      if((base2int(seq[i]) == -1) ||
+         (base2BAMint(seq[i]) != bam1_seqi(bam1_seq(samA),j))){
+         // If bases don't match, multiply probability by probability of error.
+         lProb += lPMis;
+         lowLProb += lPMis;
+      }else{
+         lProb += lPHit;
+         hitC --;
+         if((addMisC>0) && (reversed || (addMisC>hitC))){
+            // If there are some misses left add a 'miss' to the 'low probability'.
+            lowLProb += lPMis;
+            addMisC--;
+         }else{
+            lowLProb += lPHit;
+         }
+      }
+      i++;
+      j++;
+      cigarOpCount --;
+   }
+   return pair<double, double>(lProb,lowLProb);
+}//}}}
+bool ReadDistribution::getP(fragmentP frag,double &lProb,double &lProbNoise){ //{{{
+   lProb = ns_misc::LOG_ZERO;
+   lProbNoise = ns_misc::LOG_ZERO;
+   long tid = frag->first->core.tid;
+   long trLen = trInf->L(tid),len;
+   // Check transcript IDs {{{
+   if((tid < 0)||(tid>=M)){
+      if(warnFirst && (warnUnknownTID==0))
+         warning("TID unknown: %s: %ld\n",bam1_qname(frag->first),tid);
+      warnUnknownTID++;
+      return false;
+   }
+   if((frag->paired)&&(tid!=frag->second->core.tid)){
+      if(warnFirst && (warnTIDmismatch==0))
+         warning("TID mismatch: %s: %s %s\n",bam1_qname(frag->first),
+                 trInf->trName(tid).c_str(),
+                 trInf->trName(frag->second->core.tid).c_str());
+      warnTIDmismatch++;
+      return false;
+   }
+   //}}}
+   double lP = 0;
+   // Get probability based on base mismatches: {{{
+   pair<double, double> lpSeq1(0,0),lpSeq2(0,0);
+   lpSeq1 = getSequenceLProb(frag->first);
+   if(frag->paired)lpSeq2 = getSequenceLProb(frag->second);
+   // }}}
+   // Calculate reads' true end position: {{{
+   long frag_first_endPos, frag_second_endPos=0;
+   frag_first_endPos = bam_calend(&frag->first->core, bam1_cigar(frag->first));
+   if(frag->paired){
+      frag_second_endPos = bam_calend(&frag->second->core, bam1_cigar(frag->second));
+   }
+   // }}}
+   if(frag->paired){
+   // Get probability of length {{{
+      if(frag->second->core.pos > frag->first->core.pos)
+         len = frag_second_endPos - frag->first->core.pos;
+      else{
+         len = frag_first_endPos - frag->second->core.pos;
+      }
+      // compute length probability and normalize by probability of all possible lengths (cdf):
+      // P*=lengthP/lengthNorm
+      // }}}
+      if(validLength) lP += getLengthLP(len) - getLengthLNorm(trLen);
+   }else{
+      len = frag_first_endPos - frag->first->core.pos;
+   }
+   if(uniform){
+      // Get probability of position for uniform distribution
+      // P*=1/(trLen-len+1)
+      lP -= log(trLen - len + 1.0);
+   }else{ // Positional & Sequence bias {{{
+      // Get probability of position given read bias model
+      // check mates' relative position:
+      if( frag->paired && (frag->first->core.pos > frag->second->core.pos)){
+         noteFirstMateDown ++;
+         bam1_t *tmp = frag->second;
+         frag->second = frag->first;
+         frag->first = tmp;
+      }
+      if(!frag->paired){
+         if(frag->first->core.flag & BAM_FREVERSE){
+            // If read was reverse complement, then it's 3' mate.
+            // P*=posBias3'*seqBias3'/weightNorm3'
+            lP += log(getPosBias(frag->first->core.pos, frag_first_endPos, 
+                                 mate_3, trLen)) +
+               log(getSeqBias(frag_first_endPos , mate_3, tid )) -
+               log(getWeightNorm( (long) len, mate_3, tid));
+         }else{
+            // P*=posBias5'*seqBias5'/weightNorm5'
+            lP += log(getPosBias(frag->first->core.pos, frag_first_endPos,
+                                 mate_5, trLen)) +
+               log(getSeqBias(frag->first->core.pos, mate_5, tid )) -
+               log(getWeightNorm( (long) len, mate_5, tid));
+         }
+      }else{
+         // check strand of the reads:
+         if((!unstranded) && 
+            ((frag->first->core.flag & BAM_FREVERSE) ||
+            (! frag->second->core.flag & BAM_FREVERSE))){
+               if(warnFirst && (warnPos==0))
+                  warning("wrong strand: %s: %s\n",bam1_qname(frag->first),
+                          trInf->trName(tid).c_str());
+               warnPos ++;
+               return false;
+         }
+//#pragma omp parallel sections num_threads (2) reduction(*:P)
+//{
+//   #pragma omp section
+         // P*=1/weightNormFull
+         lP -= log(getWeightNorm( (long) len, FullPair, tid));
+//   #pragma omp section
+//   {
+         // P*=posBias5'*posBias3'*seqBias5'*seqBias3'
+         lP += log(getPosBias(frag->first->core.pos, frag_second_endPos,
+                              FullPair, trLen))
+          + log(getSeqBias(frag->first->core.pos, mate_5, tid ))
+          + log(getSeqBias(frag_second_endPos , mate_3, tid )); 
+//   }
+//}
+      }
+   } //}}}
+   lProb = lP + lpSeq1.first+lpSeq2.first;
+   lProbNoise = lP + lpSeq1.second+lpSeq2.second;
+   return true;
+}//}}}
+void ReadDistribution::updatePosBias(long pos, biasT bias, long tid, double Iexp){ //{{{
+   if(bias == readM_3)pos--;
+   long group, rel, trLen;
+   trLen = trInf->L(tid);
+   // transcript too short:
+   if(trLen < trNumberOfBins) return;
+   // choose group:
+   for(group = 0;group < trSizesN;group++)
+      if(trLen<trSizes[group])break;
+   // find relative position:
+   rel = (pos * trNumberOfBins) / trLen;
+   if(rel>=trNumberOfBins)rel=trNumberOfBins-1;
+   //add inverse expression:
+   posProb[bias][ group ][ rel ] += Iexp;
+}//}}}
+void ReadDistribution::updateSeqBias(long pos, biasT bias, long tid, double Iexp){ //{{{
+   if(Iexp<=0)return;
+   if(bias>3)return; //this should not happen
+   long start ;
+   string seq;
+   // Set correct start based on orientation.
+   if((bias == readM_5)||(bias == uniformM_5)){
+      start = pos - vlmmStartOffset - MAX_NODE_PAR;
+      seq = trSeq->getSeq(tid, start, vlmmNodesN + MAX_NODE_PAR);
+   }else{
+      start = pos + vlmmStartOffset - vlmmNodesN ;
+      // Get don't need complementing as it is always complement.
+      seq = trSeq->getSeq(tid, start, vlmmNodesN + MAX_NODE_PAR);
+      // Only reverse the sequence.
+      reverse(seq.begin(),seq.end());
+   }
+   // Update bias weights.
+   for(long i=0;i<vlmmNodesN;i++){
+      seqProb[bias][i].update( Iexp, seq[i+2], seq[i+1], seq[i]);
+   }
+}//}}}
+double ReadDistribution::getPosBias(long start, long end, readT read, long trLen) const { //{{{
+   end --;
+   // transcript too short:
+   if(trLen < trNumberOfBins) return 1;
+   long group, relS, relE;
+   // choose group:
+   for(group = 0;group < trSizesN;group++)
+      if(trLen<trSizes[group])break;
+   // find relative positions:
+   relS = (start * trNumberOfBins) / trLen;
+   if(relS>=trNumberOfBins)relS=trNumberOfBins-1;
+   relE = (end * trNumberOfBins) / trLen;
+   if(relE>=trNumberOfBins)relE=trNumberOfBins-1;
+   double posBias = 1;
+   // return bias weight
+   if((read == FullPair) || (read == mate_5))
+      posBias *= posProb[ weight_5 ][ group ][ relS ];
+   if((read == FullPair) || (read == mate_3))
+      posBias *= posProb[ weight_3 ][ group ][ relE ];
+   return posBias;
+}//}}}
+double ReadDistribution::getSeqBias(long pos, readT read, long tid) const{ //{{{
+   if(read==FullPair)return 0; // this should never happen
+   long start;
+   biasT bias,biasNorm;
+   // Get sequence based on which fragment end we are dealing with.
+   if(read == mate_5){
+      start = pos - vlmmStartOffset - MAX_NODE_PAR;
+   }else{
+      start = pos + vlmmStartOffset - vlmmNodesN;
+   }
+   string seq = trSeq->getSeq(tid, start, vlmmNodesN + MAX_NODE_PAR);
+   if(read == mate_5){
+      bias = readM_5;
+      biasNorm = uniformM_5;
+   }else{
+      bias = readM_3;
+      biasNorm = uniformM_3;
+      // Reverse the sequence for 3' end.
+      reverse(seq.begin(),seq.end());
+   }
+   double B = 1;
+   for(long i=0;i<vlmmNodesN;i++)
+      // FIX HERE (probably that we are always doing 'same' division)
+      B *= seqProb[bias][i].getP( seq[i+2], seq[i+1], seq[i]) /
+           seqProb[biasNorm][i].getP( seq[i+2], seq[i+1], seq[i]);
+   return B;
+}//}}}
+inline char ReadDistribution::getBase(long pos, const string &fSeq) const{ //{{{
+   if((pos<0)||(pos>=(long)fSeq.size()))return 'N';
+   return fSeq[pos];
+}//}}}
+double ReadDistribution::getSeqBias(long start, long end, readT read, const string &fSeq) const{ //{{{
+   start = start - vlmmStartOffset - MAX_NODE_PAR;
+   end = end + vlmmStartOffset + MAX_NODE_PAR - 1;
+   
+   double B = 1;
+   long i,j;
+   if((read==FullPair) || (read == mate_5)){
+      for(i=0,j=start; i<vlmmNodesN; i++, j++)
+         // FIX HERE (probably that we are always doing 'same' division)
+         B *= seqProb[readM_5][i].getP( getBase(j+2,fSeq), getBase(j+1,fSeq), getBase(j,fSeq)) /
+              seqProb[uniformM_5][i].getP( getBase(j+2,fSeq), getBase(j+1,fSeq), getBase(j,fSeq));
+   }
+   if((read==FullPair) || (read == mate_3)){
+      // For 3' bias we go from 'end' position backwards.
+      for(i=0,j=end; i<vlmmNodesN; i++, j--)
+         // FIX HERE (probably that we are always doing 'same' division)
+         B *= seqProb[readM_3][i].getP( getBase(j-2,fSeq), getBase(j-1,fSeq), getBase(j,fSeq)) /
+              seqProb[uniformM_3][i].getP( getBase(j-2,fSeq), getBase(j-1,fSeq), getBase(j,fSeq));
+   }
+   return B;
+}//}}}
+/* inline char ReadDistribution::complementBase(char base) const{ //{{{
+   if((base=='A')||(base=='a')) return'T';
+   if((base=='T')||(base=='t')) return 'A';
+   if((base=='C')||(base=='c')) return 'G';
+   if((base=='G')||(base=='g')) return 'C';
+   return 'N';
+}//}}} */
+double ReadDistribution::getWeightNorm(long len, readT read, long tid){ //{{{
+   if(len == 0)return 1;
+   if(weightNorms[read][tid].count(len) == 0){
+      const string &trS = trSeq->getTr(tid);
+      // We are not complementing.
+      //for(size_t i=0;i<trRS.size();i++)trRS[i] = complementBase(trRS[i]);
+      long trLen = trInf->L(tid), pos;
+      double norm = 0,w;
+      #pragma omp parallel for \
+         private(w) \
+         reduction(+:norm)
+      for(pos = 0;pos <= trLen-len;pos++){
+         w = getPosBias(pos, pos + len, read, trLen) *
+             getSeqBias(pos, pos + len, read, trS);
+         norm+=w;
+      }
+      weightNorms[read][tid][len] = norm;
+//      message("w: %ld %ld %ld  %ld%lf\n",read,tid,len,trLen<"   ",norm);
+      return norm;
+   }
+   return weightNorms[read][tid][len];
+}//}}}
+long ReadDistribution::getWeightNormCount() const{//{{{
+   long length_sum=0;
+   for(size_t i=0;i<weightNorms.size();i++)
+      for(size_t j=0;j<weightNorms[i].size();j++)
+         length_sum+=weightNorms[i][j].size();
+   return length_sum;
+}//}}}
+double ReadDistribution::getLengthLP(long len) const{//{{{
+   if(len>=(double)lLengthP.size())return computeLengthLP(len);
+   return lLengthP[len];
+}//}}}
+double ReadDistribution::computeLengthLP(double len) const{//{{{
+   //return 1./(len*lSigma*sqrt_2_pi)*exp(-pow(log(len) - lMu, (double)2.0)/(2 * pow(lSigma, (double)2)));
+   if(len == 0)return ns_misc::LOG_ZERO;
+   const double log_sqrt_2_pi = .918938533192; // log(sqrt(2*pi))
+   const double lLen = log(len);
+   return - (lLen +
+             log(lSigma) + 
+             log_sqrt_2_pi + 
+             pow( (lLen - lMu) / lSigma, 2.0) / 2.0 );
+}//}}}
+double ReadDistribution::getLengthLNorm(long trLen) const{//{{{
+   if(trLen<(double)lLengthNorm.size())return lLengthNorm[trLen];
+
+   // erfc needs compiler with C99 standard 
+   // other option might be to use boost/math/special_functions/erf.hpp
+   const long double sqrt_2 = 1.41421356237309;
+   long double CDF2 = erfcl((lMu-log((long double)trLen)) / (lSigma * sqrt_2));
+   if(CDF2 == 0)return log(0.5)+ns_misc::LOG_ZERO;
+   return (double)(log(0.5)+log(CDF2));
+}//}}}
+void ReadDistribution::computeLengthProb() {//{{{
+   MyTimer timer;
+   if(verbose){
+      message("Pre-computing length probabilities. ");
+      timer.start();
+   }
+   long max=0;
+   if(trInf){
+      for(long i=0;i<M;i++)if(trInf->L(i)>max)max=trInf->L(i);
+      max = min(max,(long)150000);
+   }else{
+      max = 100000;
+   }
+   lLengthP.assign(max+1,ns_misc::LOG_ZERO);
+   lLengthNorm.assign(max+1,ns_misc::LOG_ZERO);
+   bool normIsOne = false;
+   for(long i=1;i<=max;i++){
+      if(normIsOne){
+         // lP is LOG_ZERO already, set norm to log(1).
+         lLengthNorm[i] = 0;
+         continue;
+      }
+      lLengthP[i] = computeLengthLP(i);
+      lLengthNorm[i] = ns_math::logAddExp(lLengthNorm[i-1],lLengthP[i]);
+      if(lLengthNorm[i] > -1e-15){
+         normIsOne=true;
+      }
+   }
+   if(verbose)timer.current();
+}//}}}
+vector<double> ReadDistribution::getEffectiveLengths(){ //{{{
+   vector<double> effL(M,0);
+   long m,len,trLen,pos;
+   double eL, lCdfNorm,lenP, wNorm;
+   string trRS;
+   // Make one caching array for each process.
+   vector<vector<double> > posBias5All(procN),posBias3All(procN);
+   MyTimer timer;
+   timer.start();
+   DEBUG(message("Eff length: validLength %d ; minFragLen: %ld.\n",(int)validLength,minFragLen));
+   #pragma omp parallel for \
+      schedule (dynamic,5) \
+      private (len,trLen,pos,eL,lenP,wNorm,lCdfNorm,trRS)
+   for(m=0;m<M;m++){
+      if(verbose && (m!=0) && (M>20) && (m%(M/10)==0)){
+         #pragma omp critical
+         {
+            message("# %ld done. ",m);
+            timer.current();
+         }
+      }
+      long threadID = 0;
+#ifdef _OPENMP
+      threadID = omp_get_thread_num();
+#endif
+      trLen = trInf->L(m);
+      if(!validLength){
+         if(trLen>singleReadLength*2) effL[m] = trLen - singleReadLength; 
+         else if(trLen>singleReadLength) effL[m] = singleReadLength;
+         else effL[m] = trLen;
+         continue;
+      }
+      lCdfNorm = getLengthLNorm(trLen);
+// always computing the effective length using fragLen only
+      if(uniform){
+         eL = 0;
+         for(len=1;len<=trLen;len++){
+            eL += exp(getLengthLP(len)-lCdfNorm) * (trLen-len);
+         }
+         // dont go below minimal fragment length
+         effL[m] = eL>minFragLen?eL:trLen;
+      }else{
+         DEBUG(message("Copy sequence.\n"));
+         const string &trS = trSeq->getTr(m);
+         vector<double> &posBias5 = posBias5All[threadID];
+         vector<double> &posBias3 = posBias3All[threadID];
+         posBias5.resize(trLen);
+         posBias3.resize(trLen);
+         DEBUG(message("Precomputing posBias.\n"));
+         for(pos = 0;pos<trLen;pos++){
+            // Don't care about end position.
+            posBias5[pos] = getPosBias(pos, trLen, mate_5, trLen)*
+                            getSeqBias(pos, trLen, mate_5, trS);
+            // Don't care about start position.
+            posBias3[pos] = getPosBias(0, pos+1, mate_3, trLen)*
+                            getSeqBias(0, pos+1, mate_3, trS);
+         }
+         eL=0;
+         DEBUG(message("Computing norms.\n"));
+         for(len=1;len<=trLen;len++){
+            wNorm = 0;
+            for(pos=0;pos <= trLen - len;pos++){
+               wNorm += posBias5[pos] * posBias3[pos+len-1];
+            }
+            lenP = exp(getLengthLP( len ) - lCdfNorm);
+            eL += lenP * wNorm;
+         }
+         // Check for weirdness and don't go below 0 (some transcripts already had 5 bases).
+         // Function isnormal assumes C99 or C++11.
+         if((!isnormal(eL)) || (eL <= 1)){
+            effL[m] = trLen;
+            DEBUG(message("weird: %lf %ld %ld\n",eL,trLen,m));
+         }else{
+            effL[m] = eL;
+         }
+      }
+   }
+   DEBUG(long same = 0);
+   if(! uniform){
+      // normalize effective length to same sum as original length
+      double effSum=0,lSum=0;
+      for(m=0;m<M;m++){
+         DEBUG(if(effL[m] == trInf->L(m))same++);
+         lSum+=trInf->L(m);
+         effSum+=effL[m];
+      }
+      for(m=0;m<M;m++)effL[m] *= lSum/effSum;
+   }
+   DEBUG(message(" same: %ld.\n",same));
+   for(m=0;m<M;m++)if(effL[m]<=0) effL[m]=trInf->L(m);
+   return effL;
+}//}}}
+
+double VlmmNode::getPsum(char b) const{//{{{
+   if(base2int(b) == -1) return 1/4;
+   if(parentsN == 2)return getP(b,'N','N')*16;
+   if(parentsN == 1)return getP(b,'N','N')*4;
+   return probs[base2int(b)];
+}//}}}
+VlmmNode::VlmmNode(long p) {//{{{
+   setParentsN(p);
+}//}}}
+void VlmmNode::setParentsN(long p) {//{{{
+   parentsN = p;
+   if(parentsN>2){
+      warning("VlmmNode: Code not read for using more than 2 parents.\n");
+      parentsN = 2;
+   }
+   // initialize probability matrix, set pseudocount:
+   probs.assign(pows4[parentsN+1], 0.01/pows4[parentsN+1]);
+}//}}}
+void VlmmNode::update(double Iexp, char b, char bp, char bpp) {//{{{
+   double expDiv = 1.0;
+   if(base2int(b) == -1)expDiv *=4.0;
+   if((parentsN>0)&&(base2int(bp) == -1))expDiv *=4.0;
+   if((parentsN>1)&&(base2int(bpp) == -1))expDiv *=4.0;
+   if(expDiv == 1){
+      // All bases are known:
+      long i=0;
+      switch(parentsN){
+         case 2: 
+            i += pows4[2]*base2int(bpp);
+         case 1: 
+            i += pows4[1]*base2int(bp);
+         default: 
+            i += base2int(b);
+      }
+      probs[ i ] += Iexp;
+   }else{
+      long i=0,j=0,k=0;
+      Iexp /= expDiv;
+      if(parentsN==2){
+         for(i=0;i<4;i++)
+            if((base2int(bpp) == i) || (base2int(bpp) == -1))
+               for(j=0;j<4;j++)
+                  if((base2int(bp) == j) || (base2int(bp) == -1))
+                     for(k=0;k<4;k++)
+                        if((base2int(b) == k) || (base2int(b) == -1))
+                           probs[pows4[2]*i + pows4[1]*j+ k]+=Iexp;
+      }else if(parentsN==1){
+         for(j=0;j<4;j++)
+            if((base2int(bp) == j) || (base2int(bp) == -1))
+               for(k=0;k<4;k++)
+                  if((base2int(b) == k) || (base2int(b) == -1))
+                     probs[pows4[1]*j+ k]+=Iexp;
+      }else{
+         for(k=0;k<4;k++)
+            // if((base2int(b) == k) || (base2int(b) == -1)); WE KNOW THAT b == 'N'
+               probs[k]+=Iexp;
+      }
+   }
+}//}}}
+void VlmmNode::normalize() {//{{{
+   double sum=0;
+   long i,j,k,index;
+   if(parentsN == 2){
+      for(k=0;k<4;k++)
+         for(j=0;j<4;j++){
+            index = pows4[2]*k + pows4[1]*j;
+            sum = 0;
+            for(i=0;i<4;i++)sum += probs[i + index];
+            for(i=0;i<4;i++)probs[i + index] /= sum;
+         }
+   }else if(parentsN == 1){
+      for(j=0;j<4;j++){
+         index = pows4[1]*j;
+         sum = 0;
+         for(i=0;i<4;i++)sum += probs[i + index];
+         for(i=0;i<4;i++)probs[i + index] /= sum;
+      }
+   }else{
+      sum = 0;
+      for(i=0;i<pows4[parentsN+1];i++)sum += probs[i];
+      for(i=0;i<pows4[parentsN+1];i++)probs[i] /= sum;
+   }
+}//}}}
+double VlmmNode::getP(char b, char bp, char bpp) const{//{{{
+   if(base2int(b) == -1)return 1.0/4.0;
+   double probDiv = 1.0;
+   if((parentsN>0)&&(base2int(bp) == -1))probDiv *=4.0;
+   if((parentsN>1)&&(base2int(bpp) == -1))probDiv *=4.0;
+   if(probDiv == 1.0){
+      // All bases are known:
+      long i=0;
+      switch(parentsN){
+         case 2: 
+            i += pows4[2]*base2int(bpp);
+         case 1: 
+            i += pows4[1]*base2int(bp);
+         default: 
+            i += base2int(b);
+      }
+      return probs[ i ];
+   }else{
+      long i=0,j=0,k=0;
+      double prob = 0;
+      // either one ore both parents are unknown==undefined
+      if(parentsN==2){
+         k = base2int(b);
+         for(i=0;i<4;i++)
+            if((base2int(bpp) == i) || (base2int(bpp) == -1))
+               for(j=0;j<4;j++)
+                  if((base2int(bp) == j) || (base2int(bp) == -1))
+                     prob += probs[pows4[2]*i + pows4[1]*j+ k];
+      }else if(parentsN==1){
+         // there was an unknown => we know that parent is unknown
+         k = base2int(b);
+         for(j=0;j<4;j++)
+            prob += probs[pows4[1]*j+ k];
+      }else ;// Covered by all bases unknown;
+      return prob / probDiv;
+   }
+}//}}}
+
diff --git a/ReadDistribution.h b/ReadDistribution.h
new file mode 100644
index 0000000..6a78fa9
--- /dev/null
+++ b/ReadDistribution.h
@@ -0,0 +1,134 @@
+#ifndef READDISTRIBUTION_H
+#define READDISTRIBUTION_H
+
+#include<vector>
+#include<map>
+
+using namespace std;
+
+#include "TranscriptInfo.h"
+#include "TranscriptExpression.h"
+#include "TranscriptSequence.h"
+
+#include "samtools/bam.h"
+#include "samtools/sam.h"
+
+namespace ns_rD {
+
+// Defaults: {{{
+const char LOW_PROB_MISSES  = 6;
+const char MAX_NODE_PAR = 2;
+const long trSizes [] = { 1334,2104,2977,4389};
+const char trSizesN = 4;
+const char trNumberOfBins = 20;
+const char vlmmNodeDependence [] = { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 0, 0};
+const char vlmmNodesN = 21;
+const char vlmmStartOffset = 8;
+const long pows4 [] = {1,4,16,64,256,1024,4096};
+//}}}
+
+struct fragmentT{//{{{
+   bam1_t *first,*second;
+   bool paired;
+   fragmentT(){
+      first = bam_init1();
+      second = bam_init1();
+      paired = true;
+   }
+   ~fragmentT(){
+      bam_destroy1(first);
+      bam_destroy1(second);
+   }
+   void copyFragment(const fragmentT *sourceF){
+      paired = sourceF->paired;
+      bam_copy1(first, sourceF->first);
+      bam_copy1(second, sourceF->second);
+   }
+};
+
+typedef fragmentT *fragmentP;
+//}}}
+
+class VlmmNode{//{{{
+   private:
+      long parentsN;
+      vector<double> probs;
+
+   public:
+      VlmmNode(){parentsN = 0;}
+      VlmmNode(long p);
+      void setParentsN(long p);
+      void update(double Iexp, char b, char bp, char bpp);
+      void normalize();
+      double getP(char b, char bp, char bpp) const;
+      double getPsum(char b) const;
+};//}}}
+
+enum biasT { readM_5, readM_3, uniformM_5, uniformM_3, weight_5, weight_3};
+enum readT { mate_5, mate_3, FullPair };
+
+} // namespace ns_rD
+
+class ReadDistribution{
+   private:
+      long procN,M,fragSeen,singleReadLength,minFragLen;
+      double lMu,lSigma,logLengthSum,logLengthSqSum;
+      long lowProbMismatches;
+      bool verbose,warnFirst,uniform,unstranded,lengthSet,gotExpression,normalized;
+      bool validLength;
+      long warnPos, warnTIDmismatch, warnUnknownTID, noteFirstMateDown;
+      TranscriptInfo* trInf;
+      TranscriptSequence* trSeq;
+      TranscriptExpression* trExp;
+      // for each transcript, remember seen fragments in map: length->(sum of probs)
+      vector<map<long,double> > trFragSeen5,trFragSeen3;
+      // cache for already computed weight norms for:
+      //    (single reads 5',3', Pair) x Transcript x Length
+      vector<vector<map<long, double> > > weightNorms;
+      // position probability arrays (RE-FACTOR to array of 4 vectors)
+      vector<vector<vector<double> > > posProb;
+      vector<vector<ns_rD::VlmmNode> > seqProb;
+      // Cache probabilities for Phred score.
+      vector<double> lProbMis;
+      vector<double> lProbHit;
+      // Mismatch likelihods along read.
+      vector<double> lFreqHit;
+      vector<double> lFreqMis;
+      // Cache length probabilities.
+      vector<double> lLengthP,lLengthNorm;
+      map<long,long> fragLengths;
+   
+      double getLengthLP(long len) const;
+      double computeLengthLP(double len) const;
+      double getLengthLNorm(long trLen) const;
+      void computeLengthProb();
+      void updateMismatchFreq(bam1_t *samA);
+      void updatePosBias(long pos, ns_rD::biasT bias, long tid, double Iexp);
+      void updateSeqBias(long pos, ns_rD::biasT bias, long tid, double Iexp);
+      double getPosBias(long start, long end, ns_rD::readT read,
+                        long trLen) const;
+      double getSeqBias(long pos, ns_rD::readT read, long tid) const;
+      inline char getBase(long pos, const string &fSeq) const;
+      double getSeqBias(long start, long end, ns_rD::readT read,
+                        const string &fSeq) const;
+      //inline char complementBase(char base) const;
+      double getWeightNorm(long len, ns_rD::readT read, long tid);
+      pair<double, double> getSequenceLProb(bam1_t *samA) const;
+   public:
+      ReadDistribution();
+      void setProcN(long procN);
+      void showFirstWarnings();
+      void writeWarnings();
+      bool init(long m, TranscriptInfo* trI, TranscriptSequence* trS, TranscriptExpression* trE, bool unstranded, bool verb = true);
+      bool initUniform(long m, TranscriptInfo* trI, TranscriptSequence* trS, bool verb = true);
+      void setLowProbMismatches(long m);
+      void setLength(double mu, double sigma);
+      bool observed(ns_rD::fragmentP frag);
+      void normalize();
+      void logProfiles(string logFileName = "");
+      bool getP(ns_rD::fragmentP frag,double &prob,double &probNoise);
+      long getWeightNormCount() const;
+      vector<double> getEffectiveLengths();
+}; 
+
+#endif
diff --git a/Sampler.cpp b/Sampler.cpp
new file mode 100644
index 0000000..2f5fa19
--- /dev/null
+++ b/Sampler.cpp
@@ -0,0 +1,220 @@
+#ifdef DoSTATS
+#include<sys/time.h>
+#endif
+
+#include "Sampler.h"
+#include "common.h"
+
+Sampler::Sampler(){ //{{{
+   m=samplesN=samplesLogged=samplesTotal=samplesOut=Nmap=Nunmap=0;
+   isoformLengths = NULL;
+#ifdef DoSTATS
+   tT=tTa=tZ=0;
+   nT=nTa=nZ=0;
+#endif
+}//}}}
+Sampler::~Sampler(){ //{{{
+#ifdef DoSTATS
+   message("---------------------------\nSTATISTICS:\n");
+   message("Theta: %lg   %lgm   av:%lgs\n",nT,tT/60000.0,tT/1000.0/nT);
+   message("Z: %lg   %lgm   av:%lgs\n",nZ,tZ/60000.0,tZ/1000.0/nZ);
+   if(nTa>0)message("Theta Act: %ld   %lgm   av:%lgs\n",nTa,tTa/60000.0,tTa/1000.0/nTa);
+   message("Total time: %lgm\n",(tT+tZ)/60000.0);
+#endif
+}//}}}
+void Sampler::init(long m, long samplesTotal, long samplesOut, long Nunmap,const TagAlignments *alignments, const distributionParameters &betaPar, const distributionParameters &dirPar, long &seed){//{{{
+//   this->n=n;
+   this->m=m;
+   this->samplesOut=samplesOut;
+   this->Nmap=alignments->getNreads();
+   this->Nunmap=Nunmap;
+   this->alignments=alignments;
+   beta=&betaPar;
+   dir=&dirPar;
+   //dir=new distributionParameters;
+   //dir->alpha=1.0/m;
+   //dir->beta=dirPar.beta;
+   rng_mt.seed(seed);
+   seed = (long) (1717171717.17*uniformDistribution(rng_mt));
+
+   resetSampler(samplesTotal);
+
+   theta.assign(m,0);
+   C.assign(m,0);
+}//}}}
+void Sampler::resetSampler(long samplesTotal){//{{{
+   this->samplesTotal=samplesTotal;
+   samplesN = 0;
+   samplesLogged = 0;
+   logRate=(double)samplesOut/samplesTotal;
+   sumC0 = 0;
+   sumNorm.first = sumNorm.second = 0;
+   thetaSum.assign(m,pairD(0,0));
+   thetaSqSum.assign(m,pairD(0,0));
+}//}}}
+long Sampler::getAverageC0(){//{{{
+   return (long) (sumC0 / sumNorm.first);
+}//}}}
+void Sampler::getAverage(vector<pairD> &av){//{{{
+   long i;
+   if((long)av.size()<m)
+      av.assign(m,pairD(0,0));
+   for(i=0;i<m;i++){
+      if(sumNorm.first != 0)
+         av[i].first=thetaSum[i].first/sumNorm.first;
+      if(sumNorm.second != 0)
+         av[i].second=thetaSum[i].second/sumNorm.second;
+   }
+}//}}}
+pairD Sampler::getAverage(long i){//{{{
+   double av1,av2;
+   av1=(sumNorm.first==0)?0:thetaSum[i].first/sumNorm.first;
+   av2=(sumNorm.second==0)?0:thetaSum[i].second/sumNorm.second;
+   return pairD(av1,av2);
+}//}}}
+pairD Sampler::getWithinVariance(long i){//{{{
+   double va1,va2;
+   if(sumNorm.first==0)
+      va1=0;
+   else
+      va1=thetaSqSum[i].first/(sumNorm.first-1.0) - 
+           (thetaSum[i].first/(sumNorm.first-1.0))*
+           (thetaSum[i].first/sumNorm.first);
+   if(sumNorm.second==0)
+      va2=0;
+   else 
+      va2=thetaSqSum[i].second/(sumNorm.second-1.0) - 
+           (thetaSum[i].second/(sumNorm.second-1.0))*
+           (thetaSum[i].second/sumNorm.second);
+   if(va1<0)message("minus %lg %lg %lg\n",thetaSqSum[i].first,thetaSum[i].first,sumNorm.first);
+   return pairD(va1,va2);
+}//}}}
+void Sampler::getThetaSums(long i, double *thSqSum, double *thSum, double *sumN){//{{{
+   if(i >= m){
+      (*thSqSum) = (*thSum) = (*sumN) = 0;
+      return;
+   }
+   *thSqSum = thetaSqSum[i].first;
+   *thSum = thetaSum[i].first;
+   *sumN = sumNorm.first;
+}//}}}
+void Sampler::getTau(vector<double> &tau, double norm){//{{{
+   double tauSum=0;
+
+   if ((theta.size() > tau.size()) || (isoformLengths->size() != tau.size()))
+     error("Sampler failed");
+
+   tau.assign(tau.size(),0);
+
+   tau[0]=theta[0]; // set thetaAct
+   // divide by length:
+   for(size_t i=1;i<theta.size();i++){
+      tau[ i ] = theta[i] / (*isoformLengths)[ i ] * norm;
+      tauSum += tau[i];
+   }
+   // DO normalize:
+   for(size_t i=1;i<tau.size();i++)
+      if(tau[i]>0) tau[i] /= tauSum;
+}//}}}
+void Sampler::appendFile(){//{{{
+   long i;
+   double norm=saveNorm;
+   if((!save) || (outFile == NULL))return;
+   thetaActLog.push_back(theta[0]);
+   outFile->precision(9);
+   (*outFile)<<scientific;
+   if(saveType == "counts"){
+      if(norm == 0)norm = Nmap;
+      for(i=1;i<m;i++)
+         (*outFile)<<theta[i]*norm<<" ";
+   }else if(saveType == "rpkm"){
+      if(norm == 0)norm = 1000000000.0;
+      for(i=1;i<m;i++)
+         if((*isoformLengths)[i]>0)
+            (*outFile)<<theta[i]*norm/(*isoformLengths)[i]<<" ";
+         else
+            (*outFile)<<theta[i]*norm<<" ";
+   }else if(saveType == "theta"){
+      if(norm == 0)norm=1.0;
+      for(i=1;i<m;i++)
+         (*outFile)<<theta[i]*norm<<" ";
+   }else if(saveType == "tau"){
+      if(norm == 0)norm=1.0;
+      vector<double> tau(m);
+      getTau(tau,norm);
+      for(i=1;i<m;i++)
+         (*outFile)<<tau[i]<<" ";
+   }
+   (*outFile)<<endl;
+}//}}}
+void Sampler::updateSums(){//{{{
+   long i;
+   double s;
+   for(i=0;i<m;i++){
+      thetaSum[i].first+=theta[i];
+      thetaSqSum[i].first+=theta[i]*theta[i];
+   }
+   sumC0+=C[0];
+   sumNorm.first++;
+   //if(doLog){
+   for(i=0;i<m;i++){
+      s = log(theta[i]) - log(1-theta[i]);//LOGIT
+      thetaSum[i].second += s;
+      thetaSqSum[i].second += s * s;
+   }
+   sumNorm.second++;
+   //}
+}//}}}
+void Sampler::saveSamples(ofstream *outFile, const vector<double> *isoformLengths, const string &saveType, double norm){//{{{
+   this->outFile = outFile;
+   this->isoformLengths = isoformLengths;
+   this->saveType = saveType;
+   saveNorm = norm;
+   save = true;
+   thetaActLog.clear();
+}//}}}
+void Sampler::noSave(){//{{{
+   save = false;
+   outFile = NULL;
+   if(isoformLengths){
+      delete isoformLengths;
+      isoformLengths = NULL;
+   }
+}//}}}
+
+void Sampler::sampleTheta(){//{{{
+#ifdef DoSTATS
+   nT++;
+   struct timeval start, end;
+   gettimeofday(&start, NULL);
+#endif
+   vector<double> gamma(m,0);
+   double gammaSum=0;
+   long i;
+   for(i=1;i<m;i++){
+      gammaDistribution.param(gDP(dir->alpha + C[i], dir->beta));
+      gamma[i]=gammaDistribution(rng_mt);
+      gammaSum+=gamma[i];
+   }
+   if (gammaSum<=0) // at least something should be more than zero
+     error("Sampler failed");
+
+   for(i=1;i<m;i++){
+      theta[i]=gamma[i]/gammaSum;
+   }
+#ifdef DoSTATS
+   gettimeofday(&end, NULL);
+   tT += (end.tv_sec-start.tv_sec)*1000*1000+(end.tv_usec-start.tv_usec);
+#endif
+}//}}}
+void Sampler::sample(){//{{{
+   samplesN++;
+}//}}}
+void Sampler::update(){//{{{
+   doLog = false;
+   if(samplesOut-samplesLogged>0){
+      if(samplesTotal-samplesN<=samplesOut-samplesLogged)doLog=true;
+      else if((long)(logRate * samplesN) > samplesLogged)doLog=true;
+   }
+   if(doLog) samplesLogged ++;
+}//}}}
diff --git a/Sampler.h b/Sampler.h
new file mode 100644
index 0000000..40a56c8
--- /dev/null
+++ b/Sampler.h
@@ -0,0 +1,99 @@
+#ifndef SAMPLER_H
+#define SAMPLER_H
+
+#include<vector>
+#include<fstream>
+#include "boost/random/mersenne_twister.hpp"
+#include "boost/random/gamma_distribution.hpp"
+#include "boost/random/uniform_01.hpp"
+
+using namespace std;
+
+#include "GibbsParameters.h"
+#include "TagAlignments.h"
+
+// compute statistics
+//#define DoSTATS
+//#define DoDebug
+
+typedef pair<double,double> pairD;
+
+class Sampler{
+   protected:
+   long m, samplesN, samplesLogged, samplesTotal, samplesOut, Nmap, Nunmap;
+   const distributionParameters *beta,*dir;
+   const TagAlignments *alignments;
+   const vector<double> *isoformLengths;
+   boost::random::mt11213b rng_mt;
+   boost::random::gamma_distribution<double> gammaDistribution;
+   typedef boost::random::gamma_distribution<double>::param_type gDP;
+   // Need by children:
+   boost::random::uniform_01<double> uniformDistribution;
+   
+   bool doLog,save;
+   string saveType;
+   ofstream *outFile;
+   double saveNorm,logRate;
+#ifdef DoSTATS   
+   long long nT,nZ,nTa;
+   double tT,tZ,tTa;
+#endif
+
+   vector<long> C;
+   double sumC0;
+   vector<double> theta;
+   vector<double> thetaActLog;
+   vector<pairD> thetaSum;
+   vector<pairD> thetaSqSum;
+   pairD sumNorm;
+
+   // Sample theta.
+   void sampleTheta();
+   // Compute tau.
+   void getTau(vector <double> &tau, double norm);
+   // Append current expression samples into file opened for saving samples.
+   void appendFile();
+   // Update sums of theta and theta^2.
+   void updateSums();
+
+   public:
+
+   Sampler();
+   virtual ~Sampler();
+   // Initialize sampler, set seed and use it to generate new seed.
+   void init(long m, long samplesTotal, long samplesOut, long Nunmap,
+             const TagAlignments *alignments,
+             const distributionParameters &betaPar, 
+             const distributionParameters &dirPar, 
+             long &seed);
+   // Reset sampler's stats before new iteration
+   void resetSampler(long samplesTotal);
+   // Return mean C[0].
+   long getAverageC0();
+   // Get vector of mean theta expression. Has "two columns" first is calculated
+   // from all samples, the second only from the "thinned" samples.
+   void getAverage(vector<pairD> &av);
+   // Get mean for transcript i.
+   pairD getAverage(long i);
+   // Get within variance for transcript i.
+   pairD getWithinVariance(long i);
+   // Get sum of theta^2, sum of theta, and their norm for transcript i.
+   void getThetaSums(long i, double *thSqSum, double *thSum, double *sumN);
+   // Return norms for theta sums.
+   pairD getSumNorms() const { return sumNorm; }
+   // Set sampler into state where samples are saved into the outFile.
+   void saveSamples(ofstream *outFile, const vector<double> *isoformLengths,
+                    const string &saveType, double norm = 0);
+   // Stop saving samples into the file.
+   void noSave();
+   // Get theta act logged values.
+   const vector<double>& getThetaActLog(){return thetaActLog;}
+
+   // Produce new McMc samples.
+   virtual void sample();
+   // If necessary ("thinned sample") sample theta; update sums.
+   virtual void update();
+};
+
+
+#endif
diff --git a/SimpleSparse.cpp b/SimpleSparse.cpp
new file mode 100644
index 0000000..35ba9d8
--- /dev/null
+++ b/SimpleSparse.cpp
@@ -0,0 +1,97 @@
+#include<cmath>
+#include<cstring>
+#ifdef _OPENMP
+#include<omp.h>
+#endif
+
+#include "SimpleSparse.h"
+
+double SimpleSparse::logSumExpVal(long st, long en) const{//{{{
+   if(st<0)st = 0;
+   if((en == -1) || (en > T)) en = T;
+   if(st >= en) return 0;
+   long i;
+   double sumE = 0, m = val[st];
+   for(i = st; i < en; i++)if(val[i] > m)m = val[i];
+   for(i = st; i < en; i++)
+      sumE += exp(val[i] - m);
+   return  m + log(sumE);
+}//}}}
+void SimpleSparse::sumRows(double res[]) const{//{{{
+   long i,r;
+   for(r=0;r<N;r++){
+      res[r]=0;
+      for(i=rowStart[r];i<rowStart[r+1];i++){
+         res[r]+=val[i];
+      }
+   }
+}//}}}
+void SimpleSparse::sumCols(double res[]) const{//{{{
+   memset(res,0,M*sizeof(double));
+   for(long i=0;i<T;i++)res[col[i]]+=val[i];
+}//}}}
+long SimpleSparse::countAboveDelta(double delta) const{//{{{
+   long i,count=0;
+   #pragma omp parallel for reduction(+:count)
+   for(i=0;i<T;i++){
+      if(val[i]>delta)count++;
+   }
+   return count;
+}//}}}
+
+void SimpleSparse::softmaxInplace(SimpleSparse *res){//{{{
+   double logRowSum = 0;
+   long i,r;
+   #pragma omp parallel for private(i,logRowSum)
+   for(r=0;r<N;r++){
+      logRowSum = logSumExpVal(rowStart[r],rowStart[r+1]);
+      for(i=rowStart[r];i<rowStart[r+1];i++){
+         val[i] = val[i] - logRowSum;
+         res->val[i] = exp( val[i] );
+      }
+   }
+}//}}}
+void SimpleSparse::softmax(SimpleSparse *res) const{//{{{
+   double logRowSum = 0;
+   long i,r;
+   #pragma omp parallel for private(i,logRowSum)
+   for(r=0;r<N;r++){
+      logRowSum = logSumExpVal(rowStart[r],rowStart[r+1]);
+      for(i=rowStart[r];i<rowStart[r+1];i++){
+         res->val[i] = exp(val[i] - logRowSum);
+      }
+   }
+}//}}}
+
+SimpleSparse::SimpleSparse(long n,long m, long t){//{{{
+   N=n;
+   M=m;
+   T=t;
+   val = new double[T];
+   base = true; // base matrix with it's own col & rowStart information
+   col = new int_least32_t[T];
+   rowStart = new int_least32_t[N+1];
+   //colStart = new long[M+1];
+}//}}}
+SimpleSparse::SimpleSparse(SimpleSparse *m0){//{{{
+   N=m0->N;
+   M=m0->M;
+   T=m0->T;
+   val = new double[T];
+   base = false; // use col & rowStart information from the base matrix m0
+   col = m0->col;
+   rowStart = m0->rowStart;
+   /*col = new long[T];
+   rowStart = new long[N+1];
+   memcpy(col, m0->col, T*sizeof(long));
+   memcpy(rowStart, m0->rowStart, (N+1)*sizeof(long));
+   */
+}//}}}
+SimpleSparse::~SimpleSparse(){//{{{
+   delete[] val;
+   if(base){
+      // BEWARE there could be other matrices using this data 
+      delete[] col;
+      delete[] rowStart;
+   }
+}//}}}
diff --git a/SimpleSparse.h b/SimpleSparse.h
new file mode 100644
index 0000000..916dce5
--- /dev/null
+++ b/SimpleSparse.h
@@ -0,0 +1,27 @@
+#ifndef SIMPLESPARSE_H
+#define SIMPLESPARSE_H
+
+#include<stdint.h>
+
+//#define setVal(x,i,y) {for(i=0;i<x->T;i++)x->val[i]=y;}
+
+class SimpleSparse {
+   private:
+   bool base;
+   public:
+   long N,M,T; // reads, transcripts, total
+   int_least32_t *rowStart,*colStart,*col;
+   double *val;
+
+   SimpleSparse(long n,long m, long t);
+   SimpleSparse(SimpleSparse *m0);
+   ~SimpleSparse();
+   void softmax(SimpleSparse *res) const;
+   void softmaxInplace(SimpleSparse *res);
+   long countAboveDelta(double delta = 0.99) const;
+   void sumCols(double res[]) const;
+   void sumRows(double res[]) const;
+   double logSumExpVal(long st, long en) const;
+};
+
+#endif
diff --git a/TagAlignments.cpp b/TagAlignments.cpp
new file mode 100644
index 0000000..b0f0bde
--- /dev/null
+++ b/TagAlignments.cpp
@@ -0,0 +1,132 @@
+#include<cmath>
+
+#include "TagAlignments.h"
+
+#include "misc.h"
+
+#include "common.h"
+
+//#define MEM_USAGE
+
+TagAlignments::TagAlignments(bool storeL){//{{{
+   knowNtotal=false;
+   knowNreads=false;
+   Ntotal=0;
+   Nreads=0;
+   storeLog = storeL;
+}//}}}
+void TagAlignments::init(long Nreads,long Ntotal, long M){//{{{
+   currentRead = 0;
+   reservedN = 0;
+   if(Nreads>0){
+      this->Nreads=Nreads;
+      knowNreads=true;
+      readIndex.reserve(Nreads+2);
+   }      
+   readIndex.push_back(0);
+   
+   if(Ntotal>0){
+      this->Ntotal=Ntotal;
+      knowNtotal=true;
+      reservedN = Ntotal+1;
+      trIds.reserve(reservedN);
+      probs.reserve(reservedN);
+   }
+   if(M>0){
+      this->M=M;
+      readsInIsoform.assign(M,-1);
+   }else{
+      readsInIsoform.clear();
+      this->M=0;
+   }
+}//}}}
+void TagAlignments::pushAlignment(long trId, double prob){//{{{
+   if(prob<=0)pushAlignmentL(trId, ns_misc::LOG_ZERO);
+   else pushAlignmentL(trId, log(prob));
+}//}}}
+void TagAlignments::pushAlignmentL(long trId, double lProb){//{{{
+   if(trId>=M){
+      M=trId+1;
+      readsInIsoform.resize(M,-1);
+   }
+   if(readsInIsoform[trId] == currentRead){
+      // The read has already one alignment to this transcript.
+     for(long i=readIndex[currentRead];i<(long)trIds.size();i++)
+        if(trIds[i] == trId){
+           probs[i] = ns_math::logAddExp(probs[i], lProb);
+           break;
+        }
+   }else{
+      if(! knowNtotal){
+         // the size of arrays is unknown try to reserve sensible amount of space if we know Nreads
+         if(knowNreads && reservedN && ((long)probs.size() == reservedN)){
+            // we reached the size of reserved space
+            double dens = (double)probs.size() / currentRead; 
+            dens *= 1.05; //increase it by 5%
+            reservedN =(long)( reservedN + (dens) * (Nreads - currentRead + 1000.0) );
+         #ifdef MEM_USAGE
+            message("TagAlignments:\n   size: %ld  reserving: %ld  capacity before: %ld\n",probs.size(),reservedN,probs.capacity());
+         #endif
+            trIds.reserve(reservedN);
+            probs.reserve(reservedN);
+         #ifdef MEM_USAGE
+            message("   capacity after: %ld\n",probs.capacity());
+         #endif
+         }else if(knowNreads && (! reservedN) && (currentRead == Nreads / 4 )){
+            // one quarter in, try to reserve sensible amount of space
+            double dens = (double)probs.size() / currentRead; 
+            dens *= 1.05; //increase it by 5%
+            reservedN =(long)((dens) * (Nreads));
+         #ifdef MEM_USAGE
+            message("TagAlignments:\n   size: %ld  reserving: %ld  capacity before: %ld\n",probs.size(),reservedN,probs.capacity());
+         #endif
+            trIds.reserve(reservedN);
+            probs.reserve(reservedN);
+         #ifdef MEM_USAGE
+            message("   capacity after: %ld\n",probs.capacity());
+         #endif
+         }
+      }
+      trIds.push_back(trId);
+      probs.push_back(lProb);
+      // Mark that transcript trId already has alignment from this read.
+      readsInIsoform[trId] = currentRead;
+   }
+}//}}}
+void TagAlignments::pushRead(){//{{{
+   // Check whether there were any valid alignments added for this read:
+   if(readIndex[currentRead] == (int_least32_t) probs.size()){
+      // If no new alignments, do nothing.
+      return;
+   }
+   // If there are alignments transform from log space if necessary and move to next read.
+   if(!storeLog){
+      double logSum = ns_math::logSumExp(probs, readIndex[currentRead], probs.size());
+      for(long i = readIndex[currentRead]; i<(long)probs.size(); i++)
+         probs[i] = exp(probs[i]-logSum);
+   }
+   // Move to the next read.
+   currentRead++;
+   readIndex.push_back(probs.size());
+}//}}}
+void TagAlignments::finalizeRead(long *M, long *Nreads, long *Ntotal){//{{{
+   *M = this->M = readsInIsoform.size();
+   *Nreads = this->Nreads = readIndex.size() - 1;
+   *Ntotal = this->Ntotal = probs.size();
+#ifdef MEM_USAGE
+   message("TagAlignments: readIndex size: %ld  capacity %ld\n",readIndex.size(),readIndex.capacity());
+   message("TagAlignments: probs size: %ld  capacity %ld\n",probs.size(),probs.capacity());
+#endif
+}//}}}
+int_least32_t TagAlignments::getTrId(long i) const {//{{{
+   if(i<Ntotal)return trIds[i];
+   return 0;
+}//}}}
+double TagAlignments::getProb(long i) const {//{{{
+   if(i<Ntotal)return probs[i];
+   return 0;
+}//}}}
+int_least32_t TagAlignments::getReadsI(long i) const {//{{{
+   if(i<=Nreads)return readIndex[i];
+   return 0;
+}//}}}
diff --git a/TagAlignments.h b/TagAlignments.h
new file mode 100644
index 0000000..691bbb3
--- /dev/null
+++ b/TagAlignments.h
@@ -0,0 +1,44 @@
+#ifndef TAGALIGNMENTS_H
+#define TAGALIGNMENTS_H
+
+#include<stdint.h>
+#include<vector>
+
+using namespace std;
+
+// Probabilities are stored in log scale.
+
+class TagAlignments{
+   private:
+      vector<int_least32_t> trIds;
+      vector<double> probs;
+      vector<int_least32_t> readIndex;
+      vector<int_least32_t> readsInIsoform;
+
+      bool storeLog,knowNtotal,knowNreads;
+      long M,Ntotal,Nreads,currentRead,reservedN;
+   public:
+      // Constructor, can specify whether the probabilities should be stored in log space.
+      TagAlignments(bool storeL = true);
+      // Initialize reader. For non-zero arguments, also reserves some memory.
+      void init(long Nreads = 0,long Ntotal = 0,long M = 0);
+      // Add alignment for currently processed read.
+      void pushAlignment(long trId, double prob);
+      // Add alignment for currently processed read, with probability in log scale.
+      void pushAlignmentL(long trId, double lProb);
+      // Finish processing current read and move onto new read. 
+      void pushRead();
+      // Finalizes reading reads and sets N, Nreads, Ntotal.
+      void finalizeRead(long *M, long *Nreads, long *Ntotal);
+      // Return TrID of i-th alignment.
+      int_least32_t getTrId(long i) const;
+      // Return alignment probability of i-th alignment as it is stored.
+      // (if it is stored in log space, return log-probability)
+      double getProb(long i) const;
+      // Get index for i-th read's alignments.
+      int_least32_t getReadsI(long i) const;
+      // Get number of reads.
+      long getNreads() const { return Nreads;}
+}; 
+
+#endif
diff --git a/TranscriptExpression.cpp b/TranscriptExpression.cpp
new file mode 100644
index 0000000..75deb04
--- /dev/null
+++ b/TranscriptExpression.cpp
@@ -0,0 +1,87 @@
+#include<algorithm>
+
+#include "TranscriptExpression.h"
+#include "FileHeader.h"
+#include "common.h"
+
+TE_FileType TranscriptExpression::guessFileType(const string &fileName){//{{{
+   string extension = fileName.substr(fileName.rfind(".")+1);
+   if(extension == "thetaMeans") return SAMPLER_MEANS;
+   if(extension == "m_alphas") return M_ALPHAS;
+   // Ends with 'mean' or 'variance' or 'var'
+   if((extension.rfind("mean") == extension.size() - 4) ||
+      (extension.rfind("variance") == extension.size() - 8) ||
+      (extension.rfind("var") == extension.size() - 3)) return MEAN_VARIANCE;
+   // Default is SAMPLER_MEANS.
+   return SAMPLER_MEANS;
+}//}}}
+TranscriptExpression::TranscriptExpression(){//{{{
+   M=0;
+   logged=false;
+}//}}}
+TranscriptExpression::TranscriptExpression(const string &fileName, TE_FileType fileType){//{{{
+   TranscriptExpression();
+   readExpression(fileName,fileType);
+}//}}}
+bool TranscriptExpression::readExpression(const string &fileName, TE_FileType fileType){//{{{
+   long i;
+   if(fileType == GUESS)fileType = guessFileType(fileName);
+   ifstream varFile(fileName.c_str());
+   FileHeader fh(&varFile);
+   if((!fh.varianceHeader(&M,&logged))||(M==0)){
+      error("TranscriptExpression: Problem loading variance file %s\n",(fileName).c_str());
+      return false;
+   }
+   // M_ALPHAS file contains nosie transcript.
+   if(fileType == M_ALPHAS) M--;
+   trs.resize(M);
+   if(fileType == SAMPLER_MEANS){
+      double count,mean2;
+      for(i=0;i<M;i++){
+         varFile>>trs[i].id>>trs[i].exp>>count>>mean2>>trs[i].var;
+         // IDs in SAMPLER_MEANS file are shifted by 1
+         trs[i].id--;
+         varFile.ignore(1000,'\n');
+         if(varFile.bad()){
+            error("TranscriptExpression: Problem reading transcript %ld.\n",i);
+            return false;
+         }
+      }
+   }else if(fileType == MEAN_VARIANCE){
+      for(i=0;i<M;i++){
+         trs[i].id=i;
+         varFile>>trs[i].exp>>trs[i].var;
+         varFile.ignore(1000,'\n');
+         if(varFile.bad()){
+            error("TranscriptExpression: Problem reading transcript %ld.\n",i);
+            return false;
+         }
+      }
+   }else if(fileType == M_ALPHAS){
+      double alpha, beta, beta0;
+      // Skip first entry - noise transcript.
+      varFile>>trs[0].exp>>alpha>>beta0;
+      varFile.ignore(1000,'\n');
+      for(i=0;i<M;i++){
+         trs[i].id=i;
+         varFile>>trs[i].exp>>alpha>>beta;
+         // Beta0 is the sum of all except noise.
+         trs[i].exp = alpha / beta0;
+         trs[i].var = alpha * (beta0-alpha) / (beta0 * beta0 * (beta0 + 1));
+         varFile.ignore(1000,'\n');
+         if(varFile.bad()){
+            error("TranscriptExpression: Problem reading transcript %ld.\n",i);
+            return false;
+         }
+      }
+   }
+   fh.close();
+   return true;
+}//}}}
+void TranscriptExpression::doSort(bool reverse){//{{{
+   if(! reverse)
+      sort(trs.begin(),trs.end());
+   else
+      sort(trs.rbegin(),trs.rend());
+}//}}}
+
diff --git a/TranscriptExpression.h b/TranscriptExpression.h
new file mode 100644
index 0000000..c04c9e1
--- /dev/null
+++ b/TranscriptExpression.h
@@ -0,0 +1,38 @@
+#ifndef TRANSCRIPTEXPRESSION_H
+#define TRANSCRIPTEXPRESSION_H
+#include<vector>
+#include<string>
+#include<stdint.h>
+
+using namespace std;
+
+enum TE_FileType{ SAMPLER_MEANS, MEAN_VARIANCE , M_ALPHAS, GUESS };
+
+struct trExpInfoT{
+   double exp,var;
+   int_least32_t id;
+   bool operator< (const trExpInfoT& d2) const{
+      return exp<d2.exp;
+   }
+};
+
+class TranscriptExpression{
+   private:
+      long M;
+      bool logged;
+      vector<trExpInfoT> trs;
+      TE_FileType guessFileType(const string &fileName);
+
+   public:
+      TranscriptExpression();
+      TranscriptExpression(const string &fileName, TE_FileType fileType = SAMPLER_MEANS);
+      bool readExpression(const string &fileName, TE_FileType fileType = SAMPLER_MEANS);
+      void doSort(bool reverse = false);
+      long getM(){return M;}
+      bool isLogged(){return logged;}
+      double exp(long tr){return trs[tr].exp;}
+      double var(long tr){return trs[tr].var;}
+      long id(long tr){return trs[tr].id;}
+};
+
+#endif
diff --git a/TranscriptInfo.cpp b/TranscriptInfo.cpp
new file mode 100644
index 0000000..224036d
--- /dev/null
+++ b/TranscriptInfo.cpp
@@ -0,0 +1,258 @@
+#include<fstream>
+#include<set>
+
+#include"TranscriptInfo.h"
+
+#include "common.h"
+
+bool TranscriptInfo::writeInfo(string fileName, bool force) const{//{{{
+   ofstream trF;
+   if(! force){
+      // Do nothing if file exists.
+      ifstream testF(fileName.c_str());
+      if(testF.is_open()){
+         testF.close();
+         return false;
+      }
+      testF.close();
+   }
+   trF.open(fileName.c_str(),ios::out | ios::trunc);
+   if(! trF.is_open() ) return false;
+   trF<<"# M "<<M<<endl;
+   for(long i=0;i<M;i++)
+      trF<<transcripts[i].g<<" "<<transcripts[i].t<<" "<<transcripts[i].l<<" "<<transcripts[i].effL<<endl;
+   trF.close();
+   return true;
+}//}}}
+bool TranscriptInfo::writeGeneInfo(string fileName) const{//{{{
+   ofstream geF;
+   geF.open(fileName.c_str(),ios::out | ios::trunc);
+   if(! geF.is_open() ) return false;
+   geF<<"# G "<<G<<endl;
+   geF<<"# <gene name> <# of transcripts> <average length>"<<endl;
+   double length;
+   for(long i=0;i<G;i++){
+      length = 0;
+      for(long j=0;j<genes[i].m;j++)length+=transcripts[genes[i].trs[j]].l;
+      geF<<genes[i].name<<" "<<genes[i].m<<" "<<length/genes[i].m<<endl;
+   }
+   geF.close();
+   return true;
+}//}}}
+bool TranscriptInfo::setInfo(vector<string> gNames,vector<string> tNames, vector<long> lengths){//{{{
+   // The sizes have to be equal.
+   if((gNames.size()!=tNames.size())||(tNames.size()!=lengths.size())) return false;
+   transcriptT newT;
+   M = (long) gNames.size();
+   // Create new entry for each transcript.
+   for(long i=0;i<M;i++){
+      newT.g=gNames[i];
+      newT.t=tNames[i];
+      newT.gI = 0;
+      newT.l=(int_least32_t)lengths[i];
+      newT.effL = lengths[i];
+      transcripts.push_back(newT);
+   }
+   // Initialize gene info based on gene names.
+   setGeneInfo();
+   isInitialized = true;
+   return isInitialized;
+}//}}}
+void TranscriptInfo::setGeneInfo(){//{{{
+   // Cleanup previous gene list.
+   genes.clear();
+   // Map of genes: name -> position within gene vector.
+   map<string,long> names;
+   geneT tmpG;
+   long gi=0,i;
+   groupedByGenes = true;
+   string previousName = "!-noname-!";
+   for(i=0;i<M;i++){
+      // If gene name same as previous, then just add new transcript.
+      if(transcripts[i].g == previousName){
+         transcripts[i].gI = gi;
+         genes[gi].m++;
+         genes[gi].trs.push_back(i);
+      }else{
+         previousName=transcripts[i].g;
+         // Check whether the gene name is new or was seen before.
+         if(names.count(transcripts[i].g) == 0){
+            // Prepare entry for new gene, starting with one (current) transcript.
+            tmpG.name = transcripts[i].g;
+            tmpG.m = 1;
+            tmpG.trs = vector<long>(1,i);
+            // Add entry to the gene list.
+            genes.push_back(tmpG);
+            // Set current gene index.
+            gi=genes.size()-1;
+            transcripts[i].gI = gi;
+            // Map gene name to it's index and update previousName.
+            names[transcripts[i].g] = gi;
+         }else{
+            // If gene name was seen before then transcripts are not grouped by genes.
+            groupedByGenes=false;
+            //warning("TranscriptInfo: Transcripts of gene %ld are not grouped.\n",transcripts[i].g);
+            gi = names[transcripts[i].g];
+            transcripts[i].gI = gi;
+            genes[gi].m++;
+            genes[gi].trs.push_back(i);
+         }
+      }
+   }
+   G = genes.size();
+   // Add empty record to the end.
+   tmpG.name = "";
+   tmpG.m = 0;
+   tmpG.trs.clear();
+   genes.push_back(tmpG);
+}//}}}
+TranscriptInfo::TranscriptInfo(){ clearTranscriptInfo(); }
+void TranscriptInfo::clearTranscriptInfo(){//{{{
+   M=G=0;
+   isInitialized=false;
+   groupedByGenes=true;
+   transcripts.clear();
+   genes.clear();
+}//}}}
+TranscriptInfo::TranscriptInfo(string fileName){//{{{
+   noName="wrongID";
+   // TranscriptInfo();
+   readInfo(fileName);
+}//}}}
+bool TranscriptInfo::readInfo(string fileName){//{{{
+   clearTranscriptInfo();
+   ifstream trFile(fileName.c_str());
+   if(!trFile.is_open()){
+      error("TranscriptInfo: problem reading transcript file.\n");
+      return false;
+   }
+   transcriptT newT;
+   // Read all lines of file ignoring lines starting with #.
+   while(trFile.good()){
+      while(trFile.good() && (trFile.peek()=='#'))
+         trFile.ignore(100000000,'\n');
+      if(!trFile.good()) break;
+      // Read gene name, tr name and length.
+      trFile>>newT.g>>newT.t>>newT.l;
+      newT.gI = 0;
+      // Should not hit EOF or any other error yet.
+      if(!trFile.good()) break;
+      // Read effective length if present:
+      while((trFile.peek() == '\t')||(trFile.peek() == ' ')) trFile.get();
+      // If end of line is reached then use length as effective length.
+      if((trFile.good()) && (trFile.peek() == '\n')) newT.effL = newT.l;
+      else trFile>>newT.effL;
+      // If the line was OK, then push new entry (EOF when looking for effective length is allowed).
+      if(!trFile.fail())
+         transcripts.push_back(newT);
+      // Ignore rest of the line.
+      trFile.ignore(100000000,'\n');
+   }
+   trFile.close();
+   isInitialized = true;
+   M = (long)transcripts.size();
+   setGeneInfo();
+   return isInitialized;
+}//}}}
+long TranscriptInfo::getM() const{//{{{
+   return M;
+}//}}}
+long TranscriptInfo::getG() const{//{{{
+   return G;
+}//}}}
+const vector<long> &TranscriptInfo::getGtrs(long i) const{//{{{
+   if((i>G) || (i<0)){
+      // Return empty record.
+      return genes[G].trs;
+   }
+   return genes[i].trs;
+}//}}}
+double TranscriptInfo::effL(long i) const{//{{{
+   if(isInitialized && (i<M))return transcripts[i].effL;
+   return 0;
+}//}}}
+long TranscriptInfo::L(long i) const{//{{{
+   if(isInitialized && (i<M))return transcripts[i].l;
+   return 0;
+}//}}}
+const string &TranscriptInfo::trName(long i) const{//{{{
+   if(isInitialized && (i<M))return transcripts[i].t;
+   return noName;
+}//}}}
+const string &TranscriptInfo::geName(long i) const{//{{{
+   if(isInitialized && (i<M))return transcripts[i].g;
+   return noName;
+}//}}}
+long TranscriptInfo::geId(long i) const{//{{{
+   if(isInitialized && (i<M))return transcripts[i].gI;
+   return -1;
+}//}}}
+void TranscriptInfo::setEffectiveLength(vector<double> effL){//{{{
+   if((long)effL.size() != M){
+      warning("TranscriptInfo: Wrong array size for effective length adjustment.\n");
+      return;
+   }
+   // Adjust effective length to similar scale as normal length
+   double sumL = 0,sumN = 0,norm;
+   for(long i=0;i<M;i++){
+      sumN+=effL[i];
+      sumL+=transcripts[i].l;
+   }
+// don't normalize
+//   norm = sumL / sumN;
+   norm = 1;
+   for(long i=0;i<M;i++){
+      transcripts[i].effL = effL[i] * norm;
+   }
+}//}}}
+vector<double> *TranscriptInfo::getShiftedLengths(bool effective) const{//{{{
+   vector<double> *Ls = new vector<double>(M+1);
+   for(long i=0;i<M;i++){
+      if(effective)(*Ls)[i+1] = transcripts[i].effL;
+      else (*Ls)[i+1] = transcripts[i].l;
+   }
+   return Ls;
+}//}}}
+bool TranscriptInfo::updateTrNames(const vector<string> &trList){//{{{
+   if((long)trList.size() != M)return false;
+   // Check uniqueness of new names.
+   set<string> trSet(trList.begin(),trList.end());
+   if((long)trSet.size() != M)return false;
+   for(long i=0;i<M;i++){
+      transcripts[i].t = trList[i];
+   }
+   return true;
+}//}}}
+bool TranscriptInfo::updateGeneNames(const vector<string> &geneList){//{{{
+   if((long)geneList.size() != M){
+      warning("TranscriptInfo: Number of items in gene list (%ld) does not match number of transcripts (%ld).",geneList.size(),M);
+      return false;
+   }
+   // Copy gene names in the order they are.
+   for(long i=0;i<M;i++){
+      transcripts[i].g = geneList[i];
+   }
+   // Initialize gene info.
+   setGeneInfo();
+   return true;
+}//}}}
+bool TranscriptInfo::updateGeneNames(const map<string,string> &trGeneList){//{{{
+   if((long)trGeneList.size() < M){
+      warning("TranscriptInfo: Number of items in TR->GE map (%ld) is less than the number of transcripts (%ld).",trGeneList.size(),M);
+      return false;
+   }
+   // Check all transcripts have associated gene name.
+   for(long i=0;i<M;i++){
+      if(!trGeneList.count(transcripts[i].t)){
+         warning("TranscriptInfo: No gene name for transcript [%s].",transcripts[i].t.c_str());
+         return false;
+      }
+   }
+   // Set gene names.
+   for(long i=0;i<M;i++){
+      transcripts[i].g = trGeneList.find(transcripts[i].t)->second;
+   }
+   // Initialize gene info.
+   setGeneInfo();
+   return true;
+}//}}}
diff --git a/TranscriptInfo.h b/TranscriptInfo.h
new file mode 100644
index 0000000..03d3391
--- /dev/null
+++ b/TranscriptInfo.h
@@ -0,0 +1,75 @@
+#ifndef TRANSCRIPTINFO_H
+#define TRANSCRIPTINFO_H
+#include<string>
+#include<vector>
+#include<map>
+#include<stdint.h>
+
+using namespace std;
+
+struct transcriptT {//{{{
+   string g,t;
+   int_least32_t l,gI;
+   double effL;
+   bool operator< (const transcriptT& d2) const{
+      if(g==d2.g)return t<d2.t;
+      return g<d2.g;
+   }
+};//}}}
+
+struct geneT {//{{{
+   string name;
+   int_least32_t m;
+   vector<long> trs;
+};//}}}
+
+class TranscriptInfo{
+   private:
+      // Number of transcripts, genes.
+      long M,G;
+      // Flags.
+      bool isInitialized, groupedByGenes;
+      // Transcript information:
+      // gene name, transcript name, length, effective length
+      vector<transcriptT> transcripts;
+      // Gene information:
+      // name, number of transcripts, list of transcripts
+      // Length is G+1 after initialization (with dummy record at the end).
+      vector<geneT> genes;
+      // Populate genes variable with gene information based on gene names saved in transcript information.
+      void setGeneInfo();
+      string noName;
+
+   public:
+      TranscriptInfo();
+      // Clears all information.
+      void clearTranscriptInfo();
+      TranscriptInfo(string fileName);
+      // Read info from a file name.
+      // Header (# M <num>) is ignored. The file is read until EOF.
+      bool readInfo(string fileName);
+      // Write transcript into into file. Does not overwrite existing file unless force=true.
+      bool writeInfo(string fileName, bool force = false) const;
+      bool writeGeneInfo(string fileName) const;
+      bool setInfo(vector<string> gNames, vector<string> tNames, vector<long> lengths);
+      bool isOK() const{ return isInitialized; }
+      long getM() const;
+      long getG() const;
+      const vector<long> &getGtrs(long i) const;
+      long L(long i) const;
+      double effL(long i) const;
+      const string &trName(long i) const;
+      const string &geName(long i) const;
+      long geId(long i) const;
+      bool genesOrdered() const{ return groupedByGenes; }
+      void setEffectiveLength(vector<double> effL);
+      // Return pointer to a vector of lengths with transcript IDs starting from 1.
+      vector<double> *getShiftedLengths(bool effective = false) const;
+      // Update transcript names with new names from the list.
+      bool updateTrNames(const vector<string> &trList);
+      // Sets gene names to transcripts and calls setGeneInfo to initialize gene information.
+      bool updateGeneNames(const vector<string> &geneList);
+      bool updateGeneNames(const map<string,string> &trGeneList);
+};
+
+#endif
diff --git a/TranscriptSequence.cpp b/TranscriptSequence.cpp
new file mode 100644
index 0000000..b60d4bc
--- /dev/null
+++ b/TranscriptSequence.cpp
@@ -0,0 +1,197 @@
+#include<algorithm>
+#include<fstream>
+#include<set>
+#include<sstream>
+
+#include "TranscriptSequence.h"
+
+#include "misc.h"
+
+#include "common.h"
+
+// Number of times we randomly probe for old cache record.
+// CR: #define WORST_SEARCH_N 10
+
+TranscriptSequence::TranscriptSequence(){//{{{
+   // CR: srand(time(NULL));
+   M=0;
+   cM=0;
+   gotGeneNames=false;
+   // CR: useCounter = 0;
+}//}}}
+TranscriptSequence::TranscriptSequence(string fileName, refFormatT format){//{{{
+   TranscriptSequence();
+   readSequence(fileName,format);
+}//}}}
+bool TranscriptSequence::readSequence(string fileName, refFormatT format){//{{{
+   fastaF.open(fileName.c_str());
+   if(!fastaF.is_open()){
+      error("TranscriptSequence: problem reading transcript file.\n");
+      return false;
+   }
+   trSeqInfoT newTr;
+   // CR: newTr.lastUse=0;
+   // CR: newTr.cache=-1;
+   string trDesc,geneName;
+   long pos;
+   istringstream geneDesc;
+   trNames.clear();
+   geneNames.clear();
+   gotGeneNames = true;
+   // Record trNames only from gencode ref.
+   gotTrNames = (format == GENCODE);
+   while(fastaF.good()){
+      while((fastaF.peek()!='>')&&(fastaF.good()))
+         fastaF.ignore(1000,'\n');
+      if(! fastaF.good())break;
+      // Read description line:
+      getline(fastaF, trDesc, '\n');
+      // look for gene name if previous lines had gene name:
+      if(gotGeneNames){
+         if(format == GENCODE){
+            vector<string> lineTokens = ns_misc::tokenize(trDesc,"|");
+            if(lineTokens.size()>1){
+               geneNames.push_back(lineTokens[1]);
+               trNames.push_back(lineTokens[0].substr(1));
+            }else{
+               gotGeneNames = false;
+               gotTrNames = false;
+            }
+         }else{ // format == STANDARD
+            pos=min(trDesc.find("gene:"),trDesc.find("gene="));
+            if(pos!=(long)string::npos){
+               geneDesc.clear();
+               geneDesc.str(trDesc.substr(pos+5));
+               geneDesc >> geneName;
+               geneNames.push_back(geneName);
+            }else{
+               gotGeneNames = false;
+            }
+         }
+      }
+      // remember position:
+      newTr.seek=fastaF.tellg();
+      trs.push_back(newTr);
+   }
+   // Exit if there was an error while reading the file.
+   if(fastaF.bad()){
+      error("TranscriptSequence: problem reading file.\n");
+      return false;
+   }
+   M = trs.size();
+   // Allocate cache for all.
+   cache.resize(M);
+   //cache.resize(min(M,(long)TRS_CACHE_MAX));
+   //cachedTrs.resize(min(M,(long)TRS_CACHE_MAX));
+   // Clear eof flag from input stream.
+   fastaF.clear();
+   return loadSequence();
+}//}}}
+bool TranscriptSequence::loadSequence(){//{{{
+   cache.resize(M);
+   string seqLine;
+   for(long tr=0;tr<M;tr++){
+      // Set input stream to transcript's position.
+      fastaF.seekg(trs[tr].seek);
+      // Read line by line until reaching EOF or next header line '>'.
+      while((fastaF.peek()!='>')&&( getline(fastaF,seqLine,'\n').good())){
+         cache[tr]+=seqLine;
+      }
+      if(fastaF.bad()){
+         error("TranscriptSequence: Failed reading transcript %ld\n",tr);
+         return false;
+      }
+      // Clear flags (just in case).
+      fastaF.clear();
+   }
+   return true;
+}//}}}
+long TranscriptSequence::getG() const{//{{{
+   if(!gotGeneNames)return 0;
+   return (set<string>(geneNames.begin(),geneNames.end())).size();
+}//}}}
+const string &TranscriptSequence::getTr(long tr) const{//{{{
+   if((tr<0)||(tr>=M))return noneTr;
+   // Return pointer to the sequence in cache.
+   return cache[tr];
+   /* Used with cacheing. {{{
+   // Update last use info.
+   trs[tr].lastUse = useCounter++;
+   return cache[acquireSequence(tr)];
+   }}} */
+}//}}}
+string TranscriptSequence::getSeq(long trI,long start,long l,bool doReverse) const{//{{{
+   // Return empty string for unknown transcript.
+   if((trI<0)||(trI>=M))return "";
+   /* Used with cacheing. {{{
+   // Update last use info.
+   trs[tr].lastUse = useCounter++;
+   // Get position within cache.
+   long trI = acquireSequence(tr);
+   }}} */
+   
+   // If position is not within the sequence, return Ns.
+   if(start>=(long)cache[trI].size())return string(l,'N');
+
+   string ret;
+   // Copy appropriate sequence, fill up the rest with Ns.
+   if(start<0){
+      ret.assign(-start,'N');
+      ret+=cache[trI].substr(0,l+start);
+   }else{
+      ret = cache[trI].substr(start,l);
+      if(((long)ret.size()) < l)ret.append(l-ret.size(), 'N');
+   }
+
+   if(!doReverse){
+      return ret;
+   }else{
+      // For reverse return reversed string with complemented bases.
+      reverse(ret.begin(),ret.end());
+      for(long i=0;i<l;i++)
+         if((ret[i]=='A')||(ret[i]=='a'))ret[i]='T';
+         else if((ret[i]=='T')||(ret[i]=='t'))ret[i]='A';
+         else if((ret[i]=='C')||(ret[i]=='c'))ret[i]='G';
+         else if((ret[i]=='G')||(ret[i]=='g'))ret[i]='C';
+      return ret;
+   }
+}//}}}
+/* long TranscriptSequence::acquireSequence(long tr){//{{{
+   // If the sequence is stored in cache then just return it's cache index.
+   if(trs[tr].cache!=-1)return trs[tr].cache;
+   long i,newP,j;
+   // See if cache is full.
+   if(cM<TRS_CACHE_MAX){
+      // If cache limit not reached, just add new sequence.
+      newP=cM;
+      cM++;
+   }else{
+      // If cache is full, look at WORST_SEARCH_N positions and choose the one least used.
+      newP=rand()%cM;
+      for(i=0;i<WORST_SEARCH_N;i++){
+         j=rand()%cM;
+         if(trs[cachedTrs[newP]].lastUse > trs[cachedTrs[j]].lastUse)newP=j;
+      }
+      // "remove" the transcript from position newP from cache.
+      trs[cachedTrs[newP]].cache=-1;
+      cache[newP].clear();
+   }
+   // Set input stream to transcript's position.
+   fastaF.seekg(trs[tr].seek);
+   string seqLine;
+   // Read line by line until reaching EOF or next header line '>'.
+   while((fastaF.peek()!='>')&&( getline(fastaF,seqLine,'\n').good())){
+      cache[newP]+=seqLine;
+   }
+   if(fastaF.bad()){
+      error("TranscriptSequence: Failed reading transcript %ld\n",tr);
+      return 0;
+   }
+   // Clear flags.
+   fastaF.clear();
+   // Update cache information.
+   cachedTrs[newP]=tr;
+   trs[tr].cache=newP;
+   // Return transcripts index within cache.
+   return newP;
+}//}}} */
diff --git a/TranscriptSequence.h b/TranscriptSequence.h
new file mode 100644
index 0000000..c6dc85f
--- /dev/null
+++ b/TranscriptSequence.h
@@ -0,0 +1,82 @@
+#ifndef TRANSCRIPTSEQUENCE_H
+#define TRANSCRIPTSEQUENCE_H
+#include<fstream>
+#include<stdint.h>
+#include<string>
+#include<vector>
+
+using namespace std;
+
+/*
+   Lines commented with CR: -> cache related.
+   This was commented out when cacheing was removed.
+*/
+
+// Max number f transcripts to be cached at a time.
+// CR: #define TRS_CACHE_MAX 200000
+
+struct trSeqInfoT{
+   streampos seek;
+// CR: long cache;
+// CR: uint_least64_t lastUse;
+};
+
+enum refFormatT { STANDARD, GENCODE };
+
+/*
+TranscriptSequence class manages fasta file with transcript sequence.
+// CR: Only up to TRS_CACHE_MAX transcripts are "cached" at a time.
+*/
+class TranscriptSequence{
+   private:
+      // Total number of transcripts and number of cached transcripts.
+      long M,cM;
+      // Flag indicating whether it was possible to obtain gene names from the reference file.
+      bool gotGeneNames,gotTrNames;
+      // Transcript names.
+      vector<string> trNames;
+      // Gene names for each transcript.
+      vector<string> geneNames;
+      // Transcript cache information: seek position, use and cache position.
+      vector<trSeqInfoT> trs;
+      // Cache of transcript sequences.
+      vector<string> cache;
+      // Input stream for the fasta file.
+      ifstream fastaF;
+      // Empty transcript.
+      string noneTr;
+
+      // Counter for the least recently used entry.
+      // CR: uint_least64_t useCounter;
+      // IDs of transcripts currently in the cache (same order as cache).
+      // CR: vector<long> cachedTrs;
+      // Read transcript sequence from the file, save to cache and return it's cache index.
+      // CR: long acquireSequence(long tr);
+
+      bool loadSequence();
+   public:
+      TranscriptSequence();
+      // Initialize class and cass readSequence(fileName).
+      TranscriptSequence(string fileName, refFormatT format = STANDARD);
+      // Process input file fileName and record beginning of each transcript.
+      bool readSequence(string fileName, refFormatT format = STANDARD);
+      // Return number of transcripts.
+      long getM() const{ return M; }
+      // Return number of UNIQUE gene names.
+      long getG() const;
+      // Return pointer to the transcript sequence. The reference is not persistent.
+      // NULL for unknown transcript.
+      const string &getTr(long tr) const;
+      // Return sequence from transcript <tr> starting from <start> of length <l>.
+      string getSeq(long trI, long start, long l,bool doReverse = false) const; 
+      // Reports whether transcript names were extracted from the sequence file.
+      bool hasTrNames() const{ return gotTrNames; }
+      // Reports whether gene names were extracted from the sequence file.
+      bool hasGeneNames() const{ return gotGeneNames; }
+      // Return reference to const vector containing the geneNames.
+      const vector<string> &getGeneNames() const{ return geneNames; }
+      // Return reference to const vector of transcript names.
+      const vector<string> &getTrNames() const{ return trNames; }
+};
+
+#endif
diff --git a/VariationalBayes.cpp b/VariationalBayes.cpp
new file mode 100644
index 0000000..edf495f
--- /dev/null
+++ b/VariationalBayes.cpp
@@ -0,0 +1,384 @@
+#include<fstream>
+#include<iomanip>
+#include<cmath>
+#include<cstring>
+#ifdef _OPENMP
+#include<omp.h>
+#endif
+#include "asa103/asa103.hpp"
+#include "boost/random/normal_distribution.hpp"
+#include "boost/random/gamma_distribution.hpp"
+
+#include "VariationalBayes.h"
+
+#include "common.h"
+
+#define SWAPD(x,y) {tmpD=x;x=y;y=tmpD;}
+#define ZERO_LIMIT 1e-12
+
+typedef boost::random::gamma_distribution<double>::param_type gDP;
+
+void VariationalBayes::setLog(string logFileName,MyTimer *timer){//{{{
+   this->logFileName=logFileName;
+   this->logTimer=timer;
+}//}}}
+VariationalBayes::VariationalBayes(SimpleSparse *_beta,double *_alpha,long seed,long procN){//{{{
+/*
+ As bitseq_vb::__init__(self, alpha, beta) in python
+ Python difference:
+  - python version excludes beta.data <= 1e-40
+*/
+   quiet = false;
+   logFileName = "tmp.convLog";
+   logTimer = NULL;
+#ifdef SUPPORT_OPENMP
+   omp_set_num_threads(procN);
+#endif
+   long i;
+   beta=_beta;
+   N=beta->N;
+   M=beta->M;
+   T=beta->T;
+   
+   //logBeta= new SimpleSparse(beta);
+   //beta already contains log probabilities.
+
+   if(_alpha){
+      alpha = _alpha;
+   }else{
+      alpha = new double[M];
+      for(i=0;i<M;i++)alpha[i]=1.;
+   }
+   phiHat = new double[M];
+   digA_pH = new double[M];
+   
+   rng_mt.seed(seed);
+   boost::random::normal_distribution<long double> normalD;
+   //typedef boost::random::normal_distribution<long double>::param_type nDP;
+   //normalD.param(nDP(0,1));
+
+   phi_sm = new SimpleSparse(beta);
+   for(i=0;i<T;i++)phi_sm->val[i] = normalD(rng_mt);
+   phi = new SimpleSparse(beta);
+   // PyDif make phi a copy of phi_sm <- not important because of unpack() coming up next
+   
+   unpack(phi_sm->val); //unpack(pack()); 
+
+   double alphaS=0,gAlphaS=0;
+   for(i=0;i<M;i++){
+      alphaS+=alpha[i];
+      gAlphaS+=lgamma(alpha[i]);
+   }
+   boundConstant = lgamma(alphaS) - gAlphaS - lgamma(alphaS+N);
+}//}}}
+VariationalBayes::~VariationalBayes(){//{{{
+   delete[] alpha;
+   delete[] phiHat;
+   delete[] digA_pH;
+   delete phi_sm;
+   delete phi;
+}//}}}
+void VariationalBayes::unpack(double vals[],double adds[]){//{{{
+   if(adds==NULL){
+      if(vals!=phi_sm->val)memcpy(phi_sm->val,vals,T*sizeof(double));
+   }else{
+      long i;
+      #pragma omp parallel for
+      for(i=0;i<T;i++)phi_sm->val[i] = vals[i]+adds[i];
+   }
+   phi_sm->softmaxInplace(phi); //softmax  phi_sm into phi; and set phi_sm to log(phi)
+   phi->sumCols(phiHat); // sumCols of phi into phiHat
+}//}}}
+
+void VariationalBayes::negGradient(double res[]){//{{{
+   long i;
+   int err=0,totalError=0;
+   #pragma omp parallel for private(err) reduction(+:totalError)
+   for(i=0;i<M;i++){
+      digA_pH[i]=digama(alpha[i]+phiHat[i], &err);
+      totalError += err;
+   }
+   if(totalError){error("VariationalBayes: Digamma error (%d).\n",totalError); }
+   // beta is logged now
+   #pragma omp parallel for
+   for(i=0;i<T;i++)res[i]= - (beta->val[i] - phi_sm->val[i] - 1.0 + digA_pH[beta->col[i]]);
+}//}}}
+double VariationalBayes::getBound(){//{{{
+   // the lower bound on the model likelihood
+   double A=0,B=0,C=0;
+   long i;
+   #pragma omp parallel for reduction(+:A,B)
+   for(i=0;i<T;i++){
+      // beta is logged now.
+      A += phi->val[i] * beta->val[i];
+      // PyDif use nansum instead of ZERO_LIMIT (nansum sums all elements treating NaN as zero
+      if(phi->val[i]>ZERO_LIMIT){
+         B += phi->val[i] * phi_sm->val[i];
+      }
+   }
+   #pragma omp parallel for reduction(+:C)
+   for(i=0;i<M;i++){
+      C += lgamma(alpha[i]+phiHat[i]);
+   }
+   return A+B+C+boundConstant;
+}//}}}
+
+void VariationalBayes::optimize(bool verbose,OPT_TYPE method,long maxIter,double ftol, double gtol){//{{{
+   bool usedSteepest;
+   long iteration=0,i,r;
+   double boundOld,bound,squareNorm,squareNormOld=1,valBeta=0,valBetaDiv,natGrad_i,gradGamma_i,phiGradPhiSum_r;
+   double *gradPhi,*natGrad,*gradGamma,*searchDir,*tmpD,*phiOld;
+   gradPhi=natGrad=gradGamma=searchDir=tmpD=phiOld=NULL;
+   MyTimer timer;
+   // allocate stuff {{{
+   //SimpleSparse *phiGradPhi=new SimpleSparse(beta);
+   gradPhi = new double[T];
+   // phiOld = new double[T]; will use gradPhi memory for this
+   phiOld = NULL;
+   natGrad = new double[T];
+   if(method == OPTT_HS)
+      gradGamma = new double[T];
+   searchDir = new double[T];
+   //searchDirOld = new double[T];
+   //phiGradPhi_sum = new double[N];
+   // }}}
+#ifdef LOG_CONV
+   ofstream logF(logFileName.c_str());
+   logF.precision(15);
+   logF<<"# iter bound squareNorm time(m) [M*means M*vars]"<<endl;
+   if(logTimer)logTimer->setQuiet();
+   #ifdef LONG_LOG
+   vector<double> dirAlpha(M);
+   #endif
+#endif
+   boundOld=getBound();
+   timer.start();
+   while(true){
+      negGradient(gradPhi);
+      // "yuck"
+      //setVal(phiGradPhi,i,phi->val[i]*gradPhi[i]);
+      //phiGradPhi->sumRows(phiGradPhi_sum);
+      // removed need for phiGradPhi matrix:
+      // removed need for phiGradPhi_sum
+      /*for(r=0;r<N;r++){
+         phiGradPhi_sum[r] = 0;
+         for(i=phi->rowStart[r];i<phi->rowStart[r+1];i++) phiGradPhi_sum[r] += phi->val[i] * gradPhi[i];
+      }*/
+
+      // set natGrad & gradGamma
+      squareNorm=0;
+      valBeta = 0;
+      valBetaDiv = 0;
+      #pragma omp parallel for private(i,phiGradPhiSum_r,natGrad_i,gradGamma_i) reduction(+:squareNorm,valBeta,valBetaDiv)
+      for(r=0;r<N;r++){
+         phiGradPhiSum_r = 0;
+         for(i = phi->rowStart[r]; i < phi->rowStart[r+1]; i++) 
+            phiGradPhiSum_r += phi->val[i] * gradPhi[i];
+         
+         for(i = phi->rowStart[r]; i < phi->rowStart[r+1]; i++){
+            natGrad_i = gradPhi[i] - phiGradPhiSum_r;
+            gradGamma_i = natGrad_i * phi->val[i];
+            squareNorm += natGrad_i * gradGamma_i;
+            
+            if(method==OPTT_PR){
+               valBeta += (natGrad_i - natGrad[i])*gradGamma_i;
+            }
+            if(method==OPTT_HS){
+               valBeta += (natGrad_i-natGrad[i])*gradGamma_i;
+               valBetaDiv += (natGrad_i-natGrad[i])*gradGamma[i];
+               gradGamma[i] = gradGamma_i;
+            }
+            natGrad[i] = natGrad_i;
+         }
+      }
+      
+      if((method==OPTT_STEEPEST) || (iteration % (N*M)==0)){
+         valBeta=0;
+      }else if(method==OPTT_PR ){
+         // already computed:
+         // valBeta=0;
+         // for(i=0;i<T;i++)valBeta+= (natGrad[i]-natGradOld[i])*gradGamma[i];
+         valBeta /= squareNormOld;
+      }else if(method==OPTT_FR ){
+         valBeta = squareNorm / squareNormOld;
+      }else if(method==OPTT_HS ){
+         // already computed:
+         //valBeta=div=0;
+         //for(i=0;i<T;i++){
+         //   valBeta += (natGrad[i]-natGradOld[i])*gradGamma[i];
+         //   div += (natGrad[i]-natGradOld[i])*gradGammaOld[i];
+         //}
+         if(valBetaDiv!=0)valBeta /= valBetaDiv;
+         else valBeta = 0;
+      }
+
+      if(valBeta>0){
+         usedSteepest = false;
+         //for(i=0;i<T;i++)searchDir[i]= -natGrad[i] + valBeta*searchDirOld[i];
+         // removed need for searchDirOld:
+         #pragma omp parallel for
+         for(i=0;i<T;i++)
+            searchDir[i]= -natGrad[i] + valBeta*searchDir[i];
+      }else{
+         usedSteepest = true;
+         #pragma omp parallel for
+         for(i=0;i<T;i++)
+            searchDir[i]= -natGrad[i];
+      }
+
+      //try conjugate step
+      SWAPD(gradPhi,phiOld);
+      memcpy(phiOld,phi_sm->val,T*sizeof(double)); // memcpy(phiOld,pack(),T*sizeof(double));
+      unpack(phiOld,searchDir);
+      bound = getBound();
+      iteration++;
+      // make sure there is an increase in L, else revert to steepest
+      if((bound<boundOld) && (valBeta>0)){
+         usedSteepest = true;
+         #pragma omp parallel for
+         for(i=0;i<T;i++)
+            searchDir[i]= -natGrad[i];
+         unpack(phiOld,searchDir);
+         bound = getBound();
+         // this should not be increased: iteration++;
+      }
+      if(bound<boundOld) { // If bound decreased even after using steepest, step back and quit.
+         unpack(phiOld);
+      }
+      SWAPD(gradPhi,phiOld);
+      if(verbose){
+         #ifdef SHOW_FIXED
+            messageF("iter(%c): %5.ld  bound: %.3lf grad: %.7lf  beta: %.7lf  fixed: %ld\n",(usedSteepest?'s':'o'),iteration,bound,squareNorm,valBeta,phi->countAboveDelta(0.999));
+         #else
+            messageF("iter(%c)[%5.lds]: %5.ld  bound: %.3lf grad: %.7lf  beta: %.7lf\n",(usedSteepest?'s':'o'),(long)timer.getTime(),iteration,bound,squareNorm,valBeta);
+         #endif
+      }else if(!quiet){
+         messageF("\riter(%c): %5.ld  bound: %.3lf grad: %.7lf  beta: %.7lf      ",(usedSteepest?'s':'o'),iteration,bound,squareNorm,valBeta);
+      }
+#ifdef LOG_CONV
+   if((iteration%100==0) ||
+      ((iteration<500) && (iteration%50==0)) ||
+      ((iteration<150) && (iteration%10==0)) ||
+      ((iteration<50) && (iteration%5==0))){
+      logF<<iteration<<" "<<bound<<" "<<squareNorm;
+      if(logTimer)logF<<" "<<logTimer->current(0,'m');
+      #ifdef LONG_LOG
+      double alphaSum = 0, alphaVarNorm;
+      // True 'alpha' - Dirichlet parameter is alpha+phiHat.
+      for(i=1;i<M;i++){
+         dirAlpha[i] = alpha[i] + phiHat[i];
+         alphaSum += dirAlpha[i];
+      }
+      for(i=1;i<M;i++)logF<< " " << dirAlpha[i] / alphaSum;
+      alphaVarNorm = alphaSum*alphaSum*(alphaSum+1);
+      for(i=1;i<M;i++)logF<<" "<<dirAlpha[i]*(alphaSum-dirAlpha[i])/alphaVarNorm;
+      #endif
+      logF<<endl;
+   }
+#endif
+
+      // convergence check {{{
+      if(bound<boundOld){
+         message("\nEnd: bound decrease\n");
+         break;
+      }
+      if(abs(bound-boundOld)<=ftol){
+         message("\nEnd: converged (ftol)\n");
+         break;
+      }
+      if(squareNorm<=gtol){
+         message("\nEnd: converged (gtol)\n");
+         break;
+      }
+      if(iteration>=maxIter){
+         message("\nEnd: maxIter exceeded\n");
+         break;
+      }
+      // }}}
+      // store essentials {{{
+      squareNormOld=squareNorm;
+      boundOld=bound;
+      // }}}
+      R_INTERUPT;
+   }
+   if(quiet){
+      messageF("iter(%c): %5.ld  bound: %.3lf grad: %.7lf  beta: %.7lf\n",(usedSteepest?'s':'o'),iteration,bound,squareNorm,valBeta);
+   }
+#ifdef LOG_CONV
+   logF<<iteration<<" "<<bound<<" "<<squareNorm;
+   if(logTimer)logF<<" "<<logTimer->current(0,'m');
+   #ifdef LONG_LOG
+   double alphaSum = 0, alphaVarNorm;
+   // True 'alpha' - Dirichlet parameter is alpha+phiHat.
+   for(i=1;i<M;i++){
+      dirAlpha[i] = alpha[i] + phiHat[i];
+      alphaSum += dirAlpha[i];
+   }
+   for(i=1;i<M;i++)logF<< " " << dirAlpha[i] / alphaSum;
+   alphaVarNorm = alphaSum*alphaSum*(alphaSum+1);
+   for(i=1;i<M;i++)logF<<" "<<dirAlpha[i]*(alphaSum-dirAlpha[i])/alphaVarNorm;
+   #endif
+   logF<<endl;
+   if(logTimer)logTimer->setVerbose();
+   logF.close();
+#endif
+   // free memory {{{
+   //delete phiGradPhi;
+   delete[] gradPhi;
+   delete[] natGrad;
+   if(method == OPTT_HS)
+      delete[] gradGamma;
+   delete[] searchDir;
+   //delete[] searchDirOld;
+   //delete[] phiGradPhi_sum;
+   // }}}
+}//}}}
+
+double *VariationalBayes::getAlphas(){//{{{
+   double *alphas = new double[M];
+   for(long i=0;i<M;i++)alphas[i] = alpha[i] + phiHat[i];
+   return alphas;
+}//}}}
+
+void VariationalBayes::generateSamples(long samplesN, const string &outTypeS, const vector<double> *isoformLengths, ofstream *outF) {//{{{
+   vector<double> gamma(M,0);
+   vector<gDP> alphaParam;
+   boost::random::gamma_distribution<double> gammaDistribution;
+   long n,m;
+   double gammaSum, norm, normC = 1.0;
+   // Set normalisation.
+   if(outTypeS == "counts") normC = N; // N is Nmap.
+   if(outTypeS == "rpkm") normC = 1e9;
+   // Pre-compute Dirichlet's alpha and save them as parameters for Gamma.
+   for(m=0;m<M;m++)alphaParam.push_back(gDP(alpha[m] + phiHat[m], 1.0));
+   // Sample.
+   outF->precision(9);
+   (*outF)<<scientific;
+   for(n=0;n<samplesN;n++){
+      // Compute M gammas and sum. Ignore 0 - noise transcript.
+      gammaSum = 0;
+      for(m=1;m<M;m++){
+         gammaDistribution.param(alphaParam[m]);
+         gamma[m] = gammaDistribution(rng_mt);
+         gammaSum += gamma[m];
+      }
+      // For rpkm normalize by length.
+      if(outTypeS == "rpkm"){
+         if((long)isoformLengths->size() < M){
+            error("VariationalBayes: Too few isoform lengths for RPKM computation.");
+            return;
+         }
+         for(m=1;m<M;m++)
+            if((*isoformLengths)[m]>0)
+               gamma[m] /= (*isoformLengths)[m];
+      }
+      norm = normC / gammaSum;
+      for(m=1;m < M;m++){
+         (*outF)<<gamma[m] * norm<<" ";
+      }
+      (*outF)<<endl;
+      R_INTERUPT;
+   }
+   // Delete lengths.
+   delete isoformLengths;
+}//}}}
diff --git a/VariationalBayes.h b/VariationalBayes.h
new file mode 100644
index 0000000..50ff026
--- /dev/null
+++ b/VariationalBayes.h
@@ -0,0 +1,45 @@
+#ifndef VARIATIONALBAYES_H
+#define VARIATIONALBAYES_H
+
+#include "boost/random/mersenne_twister.hpp"
+
+#include "MyTimer.h"
+#include "SimpleSparse.h"
+
+//#define LOG_CONV
+//#define LONG_LOG
+//#define SHOW_FIXED
+
+enum OPT_TYPE { OPTT_STEEPEST, OPTT_PR, OPTT_FR, OPTT_HS};
+
+class VariationalBayes {
+   private:
+      long N,M,T; // N - read num K- read number?
+      double * alpha; // prior over expression
+      double * phiHat;
+      double * digA_pH;
+      double boundConstant;
+      SimpleSparse *beta,*phi_sm,*phi;
+      // logBeta replaced by logging beta itself
+      string logFileName;
+      MyTimer *logTimer;
+      // mersen twister random number generator
+      boost::random::mt11213b rng_mt;
+      bool quiet;
+
+   public:
+      VariationalBayes(SimpleSparse *_beta,double *_alpha=NULL,long seed = 0,long procN = 1);
+      ~VariationalBayes();
+      //double *pack(){return phi_sm->val;} 
+      void unpack(double vals[], double adds[] = NULL); // set phi_m, phi=softmax(phi_m), phi_hat=sumOverCols(phi)
+      void negGradient(double res[]);
+      double getBound();
+      void optimize(bool verbose=false, OPT_TYPE method=OPTT_STEEPEST,long maxIter=10000,double ftol=1e-5, double gtol=1e-5);
+      double *getAlphas();
+      void setLog(string logFileName,MyTimer *timer);
+      // Generates samples from the distribution. The 0 (noise) transcript is left out.
+      void generateSamples(long samplesN, const string &outTypeS, const vector<double> *isoformLengths, ofstream *outF);
+      void beQuiet(){ quiet = true; }
+};
+
+#endif
diff --git a/_release_Makefile b/_release_Makefile
new file mode 100644
index 0000000..469ec77
--- /dev/null
+++ b/_release_Makefile
@@ -0,0 +1,122 @@
+CXX = g++
+ARCH = -mtune=generic
+VERSION = 0.7.5
+#	ARCH = -march=core2
+#	ARCH = -march=native
+
+
+COFLAGS = $(ARCH) -O2 -pipe
+CXXFLAGS = -DBS_VERSION=\"$(VERSION)\" -Wall $(COFLAGS)
+LDFLAGS = -Wl,-gc-sections
+BOOSTFLAGS = -I .
+OPENMP = -fopenmp -DSUPPORT_OPENMP
+
+PROGRAMS = \
+   convertSamples \
+   estimateDE \
+   estimateExpression \
+   estimateHyperPar \
+   estimateVBExpression \
+   extractSamples \
+   getFoldChange \
+   getGeneExpression \
+   getPPLR \
+   getVariance \
+   getWithinGeneExpression \
+   parseAlignment \
+   transposeLargeFile
+
+all: $(PROGRAMS)
+
+COMMON_DEPS = ArgumentParser.o common.o FileHeader.o misc.o MyTimer.o
+# PROGRAMS:
+convertSamples: convertSamples.cpp $(COMMON_DEPS) TranscriptInfo.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) convertSamples.cpp $(COMMON_DEPS) TranscriptInfo.o -o convertSamples
+
+estimateDE: estimateDE.cpp $(COMMON_DEPS) PosteriorSamples.o
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) $(LDFLAGS) estimateDE.cpp $(COMMON_DEPS) PosteriorSamples.o -o estimateDE
+
+estimateExpression: estimateExpression.cpp $(COMMON_DEPS) CollapsedSampler.o GibbsParameters.o GibbsSampler.o Sampler.o TagAlignments.o TranscriptInfo.o transposeFiles.o
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) $(OPENMP) $(LDFLAGS) estimateExpression.cpp $(COMMON_DEPS) CollapsedSampler.o GibbsParameters.o GibbsSampler.o Sampler.o TagAlignments.o TranscriptInfo.o transposeFiles.o -o estimateExpression
+
+estimateHyperPar: estimateHyperPar.cpp $(COMMON_DEPS) lowess.o PosteriorSamples.o TranscriptExpression.o 
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) $(LDFLAGS) estimateHyperPar.cpp $(COMMON_DEPS) lowess.o PosteriorSamples.o TranscriptExpression.o -o estimateHyperPar
+
+estimateVBExpression: estimateVBExpression.cpp $(COMMON_DEPS) SimpleSparse.o TagAlignments.o TranscriptInfo.o transposeFiles.o VariationalBayes.o
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) $(OPENMP) $(LDFLAGS) estimateVBExpression.cpp $(COMMON_DEPS) SimpleSparse.o TagAlignments.o TranscriptInfo.o transposeFiles.o VariationalBayes.o -o estimateVBExpression
+
+extractSamples: extractSamples.cpp $(COMMON_DEPS) PosteriorSamples.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) extractSamples.cpp $(COMMON_DEPS) PosteriorSamples.o -o extractSamples
+
+getFoldChange: getFoldChange.cpp $(COMMON_DEPS) PosteriorSamples.o 
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) getFoldChange.cpp $(COMMON_DEPS) PosteriorSamples.o -o getFoldChange
+
+getGeneExpression: getGeneExpression.cpp $(COMMON_DEPS) PosteriorSamples.o TranscriptInfo.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) getGeneExpression.cpp $(COMMON_DEPS) PosteriorSamples.o TranscriptInfo.o -o getGeneExpression
+
+getPPLR: getPPLR.cpp $(COMMON_DEPS) PosteriorSamples.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) getPPLR.cpp $(COMMON_DEPS) PosteriorSamples.o -o getPPLR
+
+getVariance: getVariance.cpp $(COMMON_DEPS) PosteriorSamples.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) getVariance.cpp $(COMMON_DEPS) PosteriorSamples.o -o getVariance
+
+getWithinGeneExpression: getWithinGeneExpression.cpp $(COMMON_DEPS) PosteriorSamples.o TranscriptInfo.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) getWithinGeneExpression.cpp $(COMMON_DEPS) PosteriorSamples.o TranscriptInfo.o -o getWithinGeneExpression
+
+parseAlignment: parseAlignment.cpp $(COMMON_DEPS) ReadDistribution.o samtools/sam.o TranscriptExpression.o TranscriptInfo.o TranscriptSequence.o
+	$(CXX) $(CXXFLAGS) $(OPENMP) $(LDFLAGS) -pthread parseAlignment.cpp $(COMMON_DEPS) ReadDistribution.o samtools/*.o TranscriptExpression.o TranscriptInfo.o TranscriptSequence.o -lz -o parseAlignment
+
+transposeLargeFile: transposeLargeFile.cpp $(COMMON_DEPS) transposeFiles.o
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) transposeLargeFile.cpp $(COMMON_DEPS) transposeFiles.o -o transposeLargeFile
+
+# LIBRARIES:
+ArgumentParser.o: ArgumentParser.cpp ArgumentParser.h
+	$(CXX) $(CXXFLAGS) -ffunction-sections -fdata-sections -c ArgumentParser.cpp
+
+CollapsedSampler.o: CollapsedSampler.cpp CollapsedSampler.h GibbsParameters.h Sampler.h
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) -c CollapsedSampler.cpp
+
+FileHeader.o: common.h misc.h FileHeader.cpp FileHeader.h
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) -ffunction-sections -fdata-sections -c FileHeader.cpp
+
+GibbsSampler.o: GibbsSampler.cpp GibbsSampler.h GibbsParameters.h Sampler.h
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) -c GibbsSampler.cpp
+
+misc.o: ArgumentParser.h PosteriorSamples.h misc.cpp misc.h
+	$(CXX) $(CXXFLAGS) -ffunction-sections -fdata-sections -c misc.cpp
+
+MyTimer.o: MyTimer.h MyTimer.cpp
+	$(CXX) $(CXXFLAGS) -ffunction-sections -fdata-sections -c MyTimer.cpp
+
+PosteriorSamples.o: PosteriorSamples.cpp PosteriorSamples.h FileHeader.h
+	$(CXX) $(CXXFLAGS) -ffunction-sections -fdata-sections -c PosteriorSamples.cpp
+
+ReadDistribution.o: ReadDistribution.cpp ReadDistribution.h TranscriptExpression.h TranscriptInfo.h TranscriptSequence.h 
+	$(CXX) $(CXXFLAGS) $(OPENMP) -c ReadDistribution.cpp
+
+Sampler.o: Sampler.cpp Sampler.h GibbsParameters.h
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) -c Sampler.cpp
+
+SimpleSparse.o: SimpleSparse.cpp SimpleSparse.h
+	$(CXX) $(CXXFLAGS) $(OPENMP) -c SimpleSparse.cpp
+
+VariationalBayes.o: VariationalBayes.cpp VariationalBayes.h SimpleSparse.h
+	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) $(OPENMP) -c VariationalBayes.cpp 
+
+common.o: common.cpp common.h
+GibbsParameters.o: ArgumentParser.h GibbsParameters.cpp GibbsParameters.h
+lowess.o: lowess.cpp lowess.h
+TagAlignments.o: TagAlignments.cpp TagAlignments.h
+TranscriptExpression.o: TranscriptExpression.cpp TranscriptExpression.h
+TranscriptInfo.o: TranscriptInfo.cpp TranscriptInfo.h
+TranscriptSequence.o: TranscriptSequence.cpp TranscriptSequence.h
+transposeFiles.o: transposeFiles.cpp transposeFiles.h FileHeader.h
+
+# EXTERNAL LIBRARIES:
+samtools/sam.o:
+	make --directory samtools
+
+# CLEAN:
+clean:
+	rm samtools/*.o *.o $(PROGRAMS)
+
diff --git a/asa103/LICENSE.txt b/asa103/LICENSE.txt
new file mode 100644
index 0000000..65c5ca8
--- /dev/null
+++ b/asa103/LICENSE.txt
@@ -0,0 +1,165 @@
+                   GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+  This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+  0. Additional Definitions.
+
+  As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+  "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+  An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+  A "Combined Work" is a work produced by combining or linking an
+Application with the Library.  The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+  The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+  The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+  1. Exception to Section 3 of the GNU GPL.
+
+  You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+  2. Conveying Modified Versions.
+
+  If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+   a) under this License, provided that you make a good faith effort to
+   ensure that, in the event an Application does not supply the
+   function or data, the facility still operates, and performs
+   whatever part of its purpose remains meaningful, or
+
+   b) under the GNU GPL, with none of the additional permissions of
+   this License applicable to that copy.
+
+  3. Object Code Incorporating Material from Library Header Files.
+
+  The object code form of an Application may incorporate material from
+a header file that is part of the Library.  You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+   a) Give prominent notice with each copy of the object code that the
+   Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the object code with a copy of the GNU GPL and this license
+   document.
+
+  4. Combined Works.
+
+  You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+   a) Give prominent notice with each copy of the Combined Work that
+   the Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the Combined Work with a copy of the GNU GPL and this license
+   document.
+
+   c) For a Combined Work that displays copyright notices during
+   execution, include the copyright notice for the Library among
+   these notices, as well as a reference directing the user to the
+   copies of the GNU GPL and this license document.
+
+   d) Do one of the following:
+
+       0) Convey the Minimal Corresponding Source under the terms of this
+       License, and the Corresponding Application Code in a form
+       suitable for, and under terms that permit, the user to
+       recombine or relink the Application with a modified version of
+       the Linked Version to produce a modified Combined Work, in the
+       manner specified by section 6 of the GNU GPL for conveying
+       Corresponding Source.
+
+       1) Use a suitable shared library mechanism for linking with the
+       Library.  A suitable mechanism is one that (a) uses at run time
+       a copy of the Library already present on the user's computer
+       system, and (b) will operate properly with a modified version
+       of the Library that is interface-compatible with the Linked
+       Version.
+
+   e) Provide Installation Information, but only if you would otherwise
+   be required to provide such information under section 6 of the
+   GNU GPL, and only to the extent that such information is
+   necessary to install and execute a modified version of the
+   Combined Work produced by recombining or relinking the
+   Application with a modified version of the Linked Version. (If
+   you use option 4d0, the Installation Information must accompany
+   the Minimal Corresponding Source and Corresponding Application
+   Code. If you use option 4d1, you must provide the Installation
+   Information in the manner specified by section 6 of the GNU GPL
+   for conveying Corresponding Source.)
+
+  5. Combined Libraries.
+
+  You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+   a) Accompany the combined library with a copy of the same work based
+   on the Library, uncombined with any other library facilities,
+   conveyed under the terms of this License.
+
+   b) Give prominent notice with the combined library that part of it
+   is a work based on the Library, and explaining where to find the
+   accompanying uncombined form of the same work.
+
+  6. Revised Versions of the GNU Lesser General Public License.
+
+  The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+  Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+  If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/asa103/asa103.hpp b/asa103/asa103.hpp
new file mode 100644
index 0000000..98638f5
--- /dev/null
+++ b/asa103/asa103.hpp
@@ -0,0 +1,96 @@
+# include <cmath>
+
+//****************************************************************************80
+
+double digama ( double x, int *ifault )
+
+//****************************************************************************80
+//
+//  Purpose:
+//
+//    DIGAMA calculates DIGAMMA ( X ) = d ( LOG ( GAMMA ( X ) ) ) / dX
+//
+//  Licensing:
+//
+//    This code is distributed under the GNU LGPL license. 
+//
+//  Modified:
+//
+//    18 January 2008
+//
+//  Author:
+//
+//    Original FORTRAN77 version by Jose Bernardo.
+//    C++ version by John Burkardt.
+//
+//  Reference:
+//
+//    Jose Bernardo,
+//    Algorithm AS 103:
+//    Psi ( Digamma ) Function,
+//    Applied Statistics,
+//    Volume 25, Number 3, 1976, pages 315-317.
+//
+//  Parameters:
+//
+//    Input, double X, the argument of the digamma function.
+//    0 < X.
+//
+//    Output, int *IFAULT, error flag.
+//    0, no error.
+//    1, X <= 0.
+//
+//    Output, double DIGAMA, the value of the digamma function at X.
+//
+{
+  double c = 8.5;
+  double d1 = -0.5772156649;
+  double r;
+  double s = 0.00001;
+  double s3 = 0.08333333333;
+  double s4 = 0.0083333333333;
+  double s5 = 0.003968253968;
+  double value;
+  double y;
+//
+//  Check the input.
+//
+  if ( x <= 0.0 )
+  {
+    value = 0.0;
+    *ifault = 1;
+    return value;
+  }
+//
+//  Initialize.
+//
+  *ifault = 0;
+  y = x;
+  value = 0.0;
+//
+//  Use approximation if argument <= S.
+//
+  if ( y <= s )
+  {
+    value = d1 - 1.0 / y;
+    return value;
+  }
+//
+//  Reduce to DIGAMA(X + N) where (X + N) >= C.
+//
+  while ( y < c )
+  {
+    value = value - 1.0 / y;
+    y = y + 1.0;
+  }
+//
+//  Use Stirling's (actually de Moivre's) expansion if argument > C.
+//
+  r = 1.0 / y;
+  value = value + log ( y ) - 0.5 * r;
+  r = r * r;
+  value = value - r * ( s3 - r * ( s4 - r * s5 ) );
+
+  return value;
+}
+//****************************************************************************80
diff --git a/biocUpdate.sh b/biocUpdate.sh
new file mode 100755
index 0000000..92b74b6
--- /dev/null
+++ b/biocUpdate.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# Script that automizes copying into Bioconductor repository.
+# THE ASSUMPTION IS that there were no updates made to Bioc sources, thus can be just replaced by sources from C++ version.
+
+
+if [ $# -ne 1 ]
+then
+   echo "Usage: provide path to bioc sources directory (.../devel/src)"
+   echo "  biocUpdate.sh [dirPath]"
+   exit
+fi
+
+for i in `ls $1`
+do
+   if [  -e $i  -a  -f $i  ]
+   then
+      other="${1}/${i}"
+      if ! diff -q $i $other > /dev/null
+      then
+         cp -v $i $other;
+      fi
+   fi
+done
diff --git a/changeList b/changeList
new file mode 100644
index 0000000..38e2250
--- /dev/null
+++ b/changeList
@@ -0,0 +1,86 @@
+0.7.5
+Improvements:
+- parseAlignment adding option to mateNamesDiffer [!! add note that this can't be used with mixed alignments !!]
+
+Bug fixes:
+- estimateExpression didn't use more cores when number of chains was changed in parameters file.
+
+0.7.4
+[0.7.3] (bug in copying alignment)
+[0.7.2]
+[0.7.1]:
+Improvements:
+- parseAlignment can use *.m_alphas files generated by estimateVBExpression for expression estimates
+- estimateHyperPar forces smoothing of hyperparameters by default
+- enable excluding singletons (half alignments of paired reads) in parseAlignment[0.7.3]
+- .prob file contains number of transcripts so estimateExpression does not need .tr file for correct transcript count[0.7.3]
+
+Bug fixes:
+- fixing problems related to parsing FASTA reads
+  (and correctly assigning additional mismatches)
+- fixed problem with half alignments being omitted from read distribution estimation[0.7.2]
+
+0.7.0:
+Improvements:
+- estimateVBexpression provides new expression inference method using fast collapsed VB
+- adding --unstranded flag to parseAlignment, to allow read pairs with various directions to be used
+- changing computation of Rhat to produce better estimates
+
+Bug fixes:
+- adding -lpthread option for parseAlignment compilation, so that samtools links even without -fopenmp
+
+[0.6.1]
+0.6.0:
+Improvements:
+- thetaMeans contains variance of theta
+- proper handling of bad alignments
+- major speed improvement for parseAlignment on multiple CPUs
+- getGeneExpression and getWithinGene expression can use 'external' TR->GE mapping or gene list
+- getWithinGene changed so that transcripts keep order unless --groupByGene
+- getGeneExpression produces gene info file with list of genes
+- increased output precision in gGE and gWGE
+
+Bug fixes:
+- fixed major bug in getGeneExpression and getWithinGeneExpression
+- fixed occasional underflow in effective length computation
+
+0.5.3:
+Improvements:
+- parseAlignment should be compatible with bowtie 2 output + new flag (-l/--limit) to limit maximum number of alignments
+- parseAlignment tries to determine input type based on extension (no need to use --format parameter in most cases)
+- more helpful error messages in get[Within]GeneExpression programs
+
+Bug fixes:
+- fixed broken extractTranscriptInfo.py and added new 'type' that looks for "gene=<gene name>"
+
+Internals:
+- replacing Boost headers with the latest stable version 1.53.0
+- replacing samtools API with the latest stable version 0.1.19
+- parseAlignment saves probabilities in log scale
+
+0.5.0:
+Improvements:
+- parse alignment can extract and save gene names from Ensembl type reference
+- estimateDE now produces comparison for all condition pairs when used with more than 2 conditions
+- added --seed option to estimateHyperPar and estimateDE
+- estimateHyperPar works in log scale only (it accepts only mean logged expression in --meanFile; and logs expression samples automatically when needed)
+
+Bug fixes:
+- fixed output of estimateDE: fold change with confidence intervals are in log2 scale, mean expression in natural log scale
+   + changed flag confidencePerc to confidenceInterval and default value to 95
+- changed getPPLR so that it produces PPLR comparison in the same "direction" as estimateDE
+- fixed problem with long lines in reference sequence file in parseAlignment
+
+Deprecation:
+- drop obsolete parseAlignment.py
+
+0.4.3:
+- changed order in estimateDE output (logFC ConfidenceLow ConfidenceHigh)
+   -> rename to credible intervals ?
+- added normalization for getV. estimateDE estimateHP
+- estimateE
+   - new format in thetaMeans
+   - set seed
+   - doMax
+- drop example.html
+- bugfix for parseAlignment
diff --git a/checkTR.py b/checkTR.py
new file mode 100755
index 0000000..88d1df5
--- /dev/null
+++ b/checkTR.py
@@ -0,0 +1,76 @@
+#!/usr/bin/python
+
+"""
+Due to an error in our code, GENE EXPRESSION and WITHIN GENE EXPRESSION results
+produced with BitSeq versions up to 0.5.3 might be wrong for some genes.
+
+This only applies to SOME genes (or transcripts of that genes) IF the transcripts
+in .tr file were not grouped by gene.
+This program can check exactly which genes have wrong expression estimates.
+
+The results of other genes and their transcripts are correct.
+"""
+
+import sys
+
+def checkFile(fileName):
+   try:
+      inf = open(fileName);
+   except:
+      sys.exit("Can't open file "+str(fileName));
+   genesSeen = set();
+
+   giName = ""
+   prevName = "";
+
+   genesWrong = list();
+   g2ts ={};
+
+   for line in inf:
+      if line[0]=='#':continue;
+      try:
+         gn = line.split()[0];
+         tr = line.split()[1];
+      except:
+         sys.exit("The TR does not seem to have valid format in line:\n"+line);
+
+      if gn in g2ts:
+         g2ts[gn].append(tr);
+      else:
+         g2ts[gn] = [tr];
+         
+      if gn == prevName:
+         if prevName != giName:
+            if giName not in genesWrong: genesWrong.append(giName);
+            if prevName not in genesWrong: genesWrong.append(prevName);
+      else:
+         if gn not in genesSeen:
+            prevName=gn;
+            giName=gn;
+            genesSeen.add(gn);
+         else:
+            giName=gn;
+   if len(genesWrong) == 0:
+      print("Everything seems to be fine.")
+   else:
+      print("These "+str(len(genesWrong))+" (out of "+str(len(genesSeen))+") have wrong GENE EXPRESSION results:")
+      trCount = 0;
+      genesStr = "";
+      for it in genesWrong:
+         genesStr += it+" ";
+         trCount+=len(g2ts[it]);
+      print(genesStr);
+      print("These "+str(trCount)+" transcripts have wrong WITHIN GENE EXPRESSION results:");
+      trsStr = "";
+      for it in genesWrong:
+         for trit in g2ts[it]:
+            trsStr += trit+" ";
+      print(trsStr)
+
+if __name__ == "__main__":
+   if len(sys.argv) <2:
+      sys.exit("Please provide file name as argument.");
+   print("Checking file "+sys.argv[1]);
+   checkFile(sys.argv[1]);
+
+
diff --git a/common.cpp b/common.cpp
new file mode 100644
index 0000000..4685b13
--- /dev/null
+++ b/common.cpp
@@ -0,0 +1,22 @@
+#include <cstdlib>
+#include <string>
+
+#include "common.h"
+
+using namespace std;
+
+void buildTime(char *argv0, string compileDate, string compileTime, const char* version){
+#ifdef BIOC_BUILD
+   return ; // dont want to print compile information
+#endif
+   message("### %s (version: %s) build: %s %s\n",argv0, version, compileDate.c_str(),compileTime.c_str());
+}
+
+bool progressLog(long cur,long outOf, long steps, char nl) {
+   // output progress status every 10%
+   if((outOf>steps)&&(cur%((long)(outOf/steps))==0)&&(cur!=0)){
+      message("# %ld done.%c",cur,nl);
+      return true;
+   }
+   return false;
+}
diff --git a/common.h b/common.h
new file mode 100644
index 0000000..c8e3b34
--- /dev/null
+++ b/common.h
@@ -0,0 +1,41 @@
+#ifndef COMMON_H
+#define COMMON_H
+
+#include<string>
+
+using std::string;
+
+const char bitseq_version[] = BS_VERSION;
+
+#ifdef BIOC_BUILD
+
+#include <R.h>
+#include <R_ext/Utils.h>
+
+#define R_INTERUPT R_CheckUserInterrupt()
+
+#define message(...) Rprintf(__VA_ARGS__)
+#define messageF(...) Rprintf(__VA_ARGS__)
+#define messageFlush()
+
+const long samplesAtOnce = 50;
+
+#else
+
+#include<cstdio>
+
+#define R_INTERUPT
+
+#define message(...) printf(__VA_ARGS__)
+#define messageF(...) {printf(__VA_ARGS__);fflush(stdout);}
+#define messageFlush() fflush(stdout)
+#define warning(...) {fprintf(stderr,"WARNING: ");fprintf(stderr, __VA_ARGS__);}
+#define error(...) {fprintf(stderr,"ERROR: ");fprintf(stderr, __VA_ARGS__);}
+
+#endif
+
+void buildTime(char *argv0, string compileDate, string compileTime, const char *version = bitseq_version);
+
+bool progressLog(long cur,long outOf, long steps = 10, char nl = '\n');
+
+#endif
diff --git a/convertSamples.cpp b/convertSamples.cpp
new file mode 100644
index 0000000..edd3cae
--- /dev/null
+++ b/convertSamples.cpp
@@ -0,0 +1,197 @@
+#include <cmath>
+#include <iomanip>
+#include <fstream>
+
+using namespace std;
+
+#include "ArgumentParser.h"
+#include "common.h"
+#include "FileHeader.h"
+#include "misc.h"
+#include "TranscriptInfo.h"
+
+namespace ns_convertS {
+double r2c(double sample, double norm, double len){
+   return sample * norm * len;
+}
+double c2r(double sample, double norm, double len){
+   return sample / norm / len;
+}
+double t2rl(double sample, double Lnorm, double len){
+   return log(sample / len) + Lnorm;
+}
+double norm(double sample, double norm, double len = 1){
+   return sample * norm;
+}
+double logNorm(double sample, double Lnorm, double len = 1){
+   return log(sample) + Lnorm;
+}
+}
+
+int main(int argc,char* argv[]){
+   string programDescription=
+"Converts or normalizes MCMC expression samples.\n\
+   [sampleFile] should contain transposed MCMC samples.";
+   // Set options {{{
+   ArgumentParser args(programDescription,"[sampleFile]",1);
+   args.addOptionS("o","outFile","outFileName",1,"Name of the output file.");
+   string actionDesc =
+"Action to perform options:\n\
+      T2R - theta to rpkm\n\
+      R2T - rpkm to theta\n\
+      T2RL - theta to log-rpkm\n\
+      C2R - counts to rpkm\n\
+      R2C - rpkm 2 counts\n\
+      NORM - normalize (samples are multiplied by Nmap)\n\
+      LOGNORM - log+normalize (samples are multiplied by Nmap and logged).";
+   args.addOptionS("a","action","action",1,actionDesc);
+   args.addOptionD("","Nmap","Nmap",0,"Total number of aligned reads. Or a normalization constant, when normalizing.");
+   args.addOptionS("t","trInfoFile","trInfoFileName",0,"File containing transcript information.");
+   if(!args.parse(argc,argv))return 0;
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+   string action = args.getS("action");
+   if(! ((action=="T2R")||(action=="T2RL")||(action=="R2T")||(action=="C2R")||
+         (action=="R2C")||(action=="NORM")||(action=="LOGNORM"))){
+      error("Main: Unknown action: %s.\n",action.c_str());
+      return 1;
+   }
+
+   // }}}
+   
+   long M=0,i,j,m,N;
+   double Nmap=0;
+   // Check Nmap //{{{
+   if(args.isSet("Nmap")){
+      Nmap=args.getD("Nmap");
+      if((action=="T2R")||(action=="T2RL")||(action=="R2T")){
+         warning("Main: Using %lf as normalization constant, are you sure about this?\n",Nmap);
+      }
+   }else{
+      if((action=="C2R")||(action=="R2C")){
+         error("Main: Need Nmap (total number of mapped reads) for converting from/to counts.\n");
+         return 1;
+      }
+      if((action=="NORM")||(action=="LOGNORM")){
+         error("Main: Need Nmap (normalization constant) for normalization.\n");
+         return 1;
+      }
+   }
+   //}}}
+   // T2R is just C2R with Nmap = 1. //{{{
+   if(action=="T2R"){
+      action="C2R";
+      if(!args.isSet("Nmap"))Nmap = 1;
+   }
+   if(action=="R2T"){
+      action="R2C";
+      if(!args.isSet("Nmap"))Nmap = 1;
+   }
+   //}}}
+   bool trans;
+   ifstream inFile;
+   FileHeader fh;
+   string geName,trName;
+   TranscriptInfo trInfo;
+
+   // Load TR file if necessary {{{
+   if(!((action=="NORM")||(action=="LOGNORM"))){
+      if((! args.isSet("trInfoFileName")) || (! trInfo.readInfo(args.getS("trInfoFileName")))){
+         error("Main: Transcript info file read failed. Please provide valid file with --trInfoFile option.\n");
+         return 1;
+      }
+      M=trInfo.getM();
+   } //}}}
+
+   ofstream outFile;
+   if(!ns_misc::openOutput(args,&outFile))return 1;
+
+   inFile.open(args.args()[0].c_str());
+   fh.setFile(&inFile);
+   if(!fh.samplesHeader(&N,&m,&trans)){//{{{
+      error("Main: Unable to open samples file.\n");
+      return 1;
+/*   }else if((trans)&&(! ((action=="--RPKMtoCOVERAGE")||(action=="-R2C")) )){
+      error("File should not be transposed");
+      return 0;*/ //}}}
+   }else if((m==0)||((M!=0)&&(M!=m))){ //{{{
+      error("Main: Wrong number of transcripts %ld vs %ld.\n",M,m);
+      return 1;
+   }//}}}
+   M=m;
+   outFile<<"# "<<args.args()[0];
+   if((action=="LOGNORM")||(action=="T2RL"))outFile<<"\n# L ";
+   if(trans) outFile<<"\n# T (M rows,N cols)";
+   else outFile<<"\n# (N rows,M cols)";
+   outFile<<"\n# M "<<M<<"\n# N "<<N<<endl;
+   outFile.precision(9);
+   outFile<<scientific;
+
+   double sample;
+   double (*comp)(double a, double b, double c)=NULL;
+   double normC=1;
+   if(action=="R2C"){
+      normC = 1e-9*Nmap;
+      comp = &ns_convertS::r2c;
+   } else if(action=="C2R"){
+      normC = 1e-9*Nmap;
+      comp = &ns_convertS::c2r;
+   } else if(action=="T2RL"){
+      if(args.isSet("Nmap")) normC = log(Nmap * 1e9);
+      else normC = log(1e9);
+      comp = &ns_convertS::t2rl;
+   } else if(action=="NORM"){
+      normC = Nmap;
+      comp = &ns_convertS::norm;
+   } else if(action=="LOGNORM"){
+      normC = log(Nmap);
+      comp = &ns_convertS::logNorm;
+   } else {
+      error("Something went wrong.\n");
+      return 1;
+   }
+   if(!((action=="NORM")||(action=="LOGNORM"))){
+      if(trans){
+         for(j=0;j<M;j++){
+            for(i=0;i<N-1;i++){
+               inFile>>sample;
+               outFile<<comp(sample,normC,trInfo.effL(j))<<" ";
+            }
+            inFile>>sample;
+            outFile<<comp(sample,normC,trInfo.effL(j))<<endl;
+         }
+      }else{
+         for(i=0;i<N;i++){
+            for(j=0;j<M-1;j++){
+               inFile>>sample;
+               outFile<<comp(sample,normC,trInfo.effL(j))<<" ";
+            }
+            inFile>>sample;
+            outFile<<comp(sample,normC,trInfo.effL(j))<<endl;
+         }
+      }
+   }else{
+      if(trans){
+         for(j=0;j<M;j++){
+            for(i=0;i<N-1;i++){
+               inFile>>sample;
+               outFile<<comp(sample,normC,1)<<" ";
+            }
+            inFile>>sample;
+            outFile<<comp(sample,normC,1)<<endl;
+         }
+      }else{
+         for(i=0;i<N;i++){
+            for(j=0;j<M-1;j++){
+               inFile>>sample;
+               outFile<<comp(sample,normC,1)<<" ";
+            }
+            inFile>>sample;
+            outFile<<comp(sample,normC,1)<<endl;
+         }
+      }
+   }
+   inFile.close();
+   outFile.close();
+   if(args.verbose)message("Done.\n");
+   return 0;
+}
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index f004316..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,14 +0,0 @@
-bitseq (0.7.0+dfsg-1) UNRELEASED; urgency=medium
-
-  * Initial upload to Debian
-
- -- Andreas Tille <tille at debian.org>  Wed, 01 Jan 2014 19:04:47 +0100
-
-bitseq (0.4.3-0ubuntu2) precise; urgency=low
-
-  * Initial release.
-  * Build against stock libbam (only static just now)
-  * Build against stock libboost
-  * For now, leave the names of the binaries as-is
-
- -- Tim Booth <tbooth at ceh.ac.uk>  Mon, 11 Feb 2013 09:59:05 +0000
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 93524dc..0000000
--- a/debian/control
+++ /dev/null
@@ -1,30 +0,0 @@
-Source: bitseq
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Tim Booth <tbooth at ceh.ac.uk>,
-           Andreas Tille <tille at debian.org>
-Section: science
-Priority: optional
-Build-Depends: debhelper (>= 9),
-               zlib1g-dev,
-               libbam-dev,
-               libboost-dev,
-               help2man
-Standards-Version: 3.9.5
-Vcs-Browser: http://anonscm.debian.org/viewvc/debian-med/trunk/packages/bitseq/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/bitseq/trunk/
-Homepage: https://github.com/BitSeq/BitSeq
-
-Package: bitseq
-Architecture: any
-Depends: ${shlibs:Depends},
-         ${misc:Depends},
-         python
-Suggests: samtools
-Description: Bayesian Inference of Transcripts from Sequencing Data
- BitSeq is an application for inferring expression levels of individual
- transcripts from sequencing (RNA-Seq) data and estimating differential
- expression (DE) between conditions. An advantage of this approach is the
- ability to account for both technical uncertainty and intrinsic biological
- variance in order to avoid false DE calls. The technical contribution to the
- uncertainty comes both from finite read-depth and the possibly ambiguous
- mapping of reads to multiple transcripts.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 39c351f..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,22 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: BitSeq
-Upstream-Contact: Peter Glaus <glaus at cs.man.ac.uk>
-Source: https://github.com/BitSeq/BitSeq/releases
-Files-Excluded: boost/ samtools/
-
-Files: *
-Copyright: © 2013 Peter Glaus
-License: Artistic-2
-
-Files: debian/*
-Copyright: © 2013 Tim Booth <tbooth at ceh.ac.uk>
-           © 2013 Andreas Tille <tille at debian.org>
-License: Artistic-2
-
-License: Artistic-2
- This program is free software; you can redistribute it and/or modify
- it under the terms of the Artistic License, which comes with Perl.
- .
- On Debian systems, the complete text of the Artistic License can be
- found in `/usr/share/common-licenses/Artistic'.
-
diff --git a/debian/patches/hardening.patch b/debian/patches/hardening.patch
deleted file mode 100644
index cf2d122..0000000
--- a/debian/patches/hardening.patch
+++ /dev/null
@@ -1,17 +0,0 @@
-Author: Andreas Tille <tille at debian.org>
-Last-Change: Wed, 01 Jan 2014 19:04:47 +0100
-Description: Propagate hardening options
-
---- BitSeq-0.7.0.orig/Makefile
-+++ BitSeq-0.7.0/Makefile
-@@ -6,8 +6,8 @@
- 
- 
- COFLAGS = $(ARCH) -O2 -pipe
--CXXFLAGS = -DBS_VERSION=\"$(VERSION)\" -Wall $(COFLAGS)
--LDFLAGS = -Wl,-gc-sections
-+CXXFLAGS += -DBS_VERSION=\"$(VERSION)\" -Wall $(COFLAGS)
-+LDFLAGS += -Wl,-gc-sections
- BOOSTFLAGS = -I .
- OPENMP = -fopenmp -DSUPPORT_OPENMP
- 
diff --git a/debian/patches/link_against_system_samtools.patch b/debian/patches/link_against_system_samtools.patch
deleted file mode 100644
index 5c6d282..0000000
--- a/debian/patches/link_against_system_samtools.patch
+++ /dev/null
@@ -1,51 +0,0 @@
-Author: Tim Booth <tbooth at ceh.ac.uk>
-Last-Update: Wed, 01 Jan 2014 19:04:47 +0100 (by Andreas Tille)
-Description: link against Debian packaged samtools
-
---- BitSeq-0.7.0.orig/ReadDistribution.h
-+++ BitSeq-0.7.0/ReadDistribution.h
-@@ -21,8 +21,8 @@
- 
- #else
- 
--#include "bam.h"
--#include "sam.h"
-+#include <samtools/bam.h>
-+#include <samtools/sam.h>
- 
- //#define bam_init1() ((bam1_t*)calloc(1, sizeof(bam1_t)))
- /*
---- BitSeq-0.7.0.orig/Makefile
-+++ BitSeq-0.7.0/Makefile
-@@ -63,8 +63,8 @@
- getWithinGeneExpression: getWithinGeneExpression.cpp $(COMMON_DEPS) PosteriorSamples.o TranscriptInfo.o
- 	$(CXX) $(CXXFLAGS) $(LDFLAGS) getWithinGeneExpression.cpp $(COMMON_DEPS) PosteriorSamples.o TranscriptInfo.o -o getWithinGeneExpression
- 
--parseAlignment: parseAlignment.cpp $(COMMON_DEPS) ReadDistribution.o samtools/sam.o TranscriptExpression.o TranscriptInfo.o TranscriptSequence.o
--	$(CXX) $(CXXFLAGS) $(OPENMP) $(LDFLAGS) -pthread -Isamtools parseAlignment.cpp $(COMMON_DEPS) ReadDistribution.o samtools/*.o TranscriptExpression.o TranscriptInfo.o TranscriptSequence.o -lz -o parseAlignment
-+parseAlignment: parseAlignment.cpp $(COMMON_DEPS) ReadDistribution.o TranscriptExpression.o TranscriptInfo.o TranscriptSequence.o
-+	$(CXX) $(CXXFLAGS) $(OPENMP) $(LDFLAGS) -pthread parseAlignment.cpp $(COMMON_DEPS) ReadDistribution.o TranscriptExpression.o TranscriptInfo.o TranscriptSequence.o -lz -lbam -o parseAlignment
- 
- transposeLargeFile: transposeLargeFile.cpp $(COMMON_DEPS) transposeFiles.o
- 	$(CXX) $(CXXFLAGS) $(LDFLAGS) transposeLargeFile.cpp $(COMMON_DEPS) transposeFiles.o -o transposeLargeFile
-@@ -92,7 +92,7 @@
- 	$(CXX) $(CXXFLAGS) -ffunction-sections -fdata-sections -c PosteriorSamples.cpp
- 
- ReadDistribution.o: ReadDistribution.cpp ReadDistribution.h TranscriptExpression.h TranscriptInfo.h TranscriptSequence.h 
--	$(CXX) $(CXXFLAGS) $(OPENMP) -Isamtools -c ReadDistribution.cpp
-+	$(CXX) $(CXXFLAGS) $(OPENMP) -c ReadDistribution.cpp
- 
- Sampler.o: Sampler.cpp Sampler.h GibbsParameters.h
- 	$(CXX) $(CXXFLAGS) $(BOOSTFLAGS) -c Sampler.cpp
-@@ -116,10 +116,7 @@
- asa103/asa103.o:
- 	make --directory asa103
- 
--samtools/sam.o:
--	make --directory samtools
--
- # CLEAN:
- clean:
--	rm asa103/*.o samtools/*.o *.o $(PROGRAMS)
-+	rm -f asa103/*.o samtools/*.o *.o $(PROGRAMS)
- 
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index 7d71a4c..0000000
--- a/debian/patches/series
+++ /dev/null
@@ -1,2 +0,0 @@
-link_against_system_samtools.patch
-hardening.patch
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 09fc5bb..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/make -f
-
-# Uncomment this to turn on verbose mode.
-export DH_VERBOSE=1
-
-pkg := $(shell dpkg-parsechangelog | sed -n 's/^Source: //p')
-version=$(shell dpkg-parsechangelog -ldebian/changelog | grep Version: | cut -f2 -d' ' | cut -f1 -d- )
-mandir=$(CURDIR)/debian/$(pkg)/usr/share/man/man1/
-bindir=$(CURDIR)/debian/$(pkg)/usr/bin/
-
-%:
-	dh $@
-
-override_dh_installman:
-        # try to create man pages whereever possible
-	mkdir -p $(mandir)
-	help2man --no-info --name='convert or normalize MCMC expression samples' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/convertSamples > $(mandir)/convertSamples.1
-	help2man --no-info --name='this bitseq module estimates differential expression from data sets' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/estimateDE > $(mandir)/estimateDE.1
-	help2man --no-info --name='estimate expression given precomputed probabilities of (observed) reads'' alignments' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/estimateExpression > $(mandir)/estimateExpression.1
-	help2man --no-info --name='estimate expression dependent hyperparameters for bitseq' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/estimateHyperPar > $(mandir)/estimateHyperPar.1
-	help2man --no-info --name='estimate expression given precomputed probabilities of (observed) reads'' alignments' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/estimateVBExpression > $(mandir)/estimateVBExpression.1
-	help2man --no-info --name='extract MCMC samples of selected transcripts' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/extractSamples > $(mandir)/extractSamples.1
-	help2man --no-info --name='computes log_2 Fold Change from MCMC expression samples' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/getFoldChange > $(mandir)/getFoldChange.1
-	help2man --no-info --name='compute expression of whole genes' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/getGeneExpression > $(mandir)/getGeneExpression.1
-	help2man --no-info --name='compute PPLR from MCMC expression samples' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/getPPLR > $(mandir)/getPPLR.1
-	help2man --no-info --name='estimates variance of MCMC samples' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/getVariance > $(mandir)/getVariance.1
-	help2man --no-info --name='compute relative expression of transcripts within genes' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/getWithinGeneExpression > $(mandir)/getWithinGeneExpression.1
-	help2man --no-info --name='pre\-compute probabilities of (observed) reads'' alignments' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/parseAlignment > $(mandir)/parseAlignment.1
-	help2man --no-info --name='helper for bitseq to transpose files lines and columns' \
-		--version-string="$(version)" --no-discard-stderr \
-		$(bindir)/transposeLargeFile > $(mandir)/transposeLargeFile.1
-
-override_dh_auto_install:
-	find -maxdepth 1 -type f -perm /111 -exec dh_install '{}' /usr/bin ';'
-
-get-orig-source:
-	mkdir -p ../tarballs
-	uscan --verbose --force-download --repack-compression xz --destdir=../tarballs
-
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index f22b3bb..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,4 +0,0 @@
-version=4
-
-opts="repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
- https://github.com/BitSeq/BitSeq/releases .*/archive/v at ANY_VERSION@@ARCHIVE_EXT@
diff --git a/estimateDE.cpp b/estimateDE.cpp
new file mode 100644
index 0000000..d81be6b
--- /dev/null
+++ b/estimateDE.cpp
@@ -0,0 +1,326 @@
+/*
+ * Original model applying the DE model to individual sets of samples independently.
+ * One set of samples == 1 sample from each replicate of each condition.
+ */
+#include<algorithm>
+#include<cmath>
+#include<fstream>
+#include<sstream>
+#include "boost/random/gamma_distribution.hpp"
+#include "boost/random/mersenne_twister.hpp"
+#include "boost/random/normal_distribution.hpp"
+
+using namespace std;
+
+#include "ArgumentParser.h"
+#include "misc.h"
+#include "MyTimer.h"
+#include "PosteriorSamples.h"
+
+#include "common.h"
+
+//#define PERCENT 0.9
+
+#define LAMBDA_0 2.0
+
+using ns_params::paramT;
+
+namespace ns_estimateDE {
+
+// Open and write headers into appropriate output files.
+// The size of outFiles[] should be C+1.
+// Returns true if everything went OK.
+bool initializeOutputFile(long C, long M, long N, const ArgumentParser &args, ofstream *outF, ofstream outFiles[]);
+// For a given mean expression expr finds alpha and beta for which were estimated for a closes expression.
+void getParams(double expr,const vector<paramT> &params, paramT *par);
+// Read transcript m into tr and prepare mu_0 and mu_00, cond does not really change.
+void readNextTranscript(long m, long C, long N, Conditions *cond, const vector<paramT> &params, vector<vector<vector<double> > > *tr, vector<paramT> *curParams, double *mu_00);
+
+}
+
+extern "C" int estimateDE(int *argc,char* argv[]){
+string programDescription =
+"Estimate differential expression from the dataset.\n\
+   [sampleFiles] should contain transposed MCMC samples from replicates.\n\
+   To distinguish conditions use C between them e.g.:\n\
+      samplesC1-R1.rpkm samplesC1-R2.rpkm C samplesC2-R1.rpkm samplesC2-R2.rpkm";
+   // Intro: {{{
+   ArgumentParser args(programDescription,"[sampleFiles]",1);
+   args.addOptionS("o","outPrefix","outFilePrefix",1,"Prefix for the output files.");
+   args.addOptionS("p","parameters","parFileName",1,"File containing estimated hyperparameters.");
+   args.addOptionB("s","samples","samples",0,"Produce samples of condition mean expression apart from PPLR and confidence.");
+   args.addOptionD("l","lambda0","lambda0",0,"Parameter lambda_0.",LAMBDA_0);
+   args.addOptionD("","confidenceInterval","cf",0,"Percentage for confidence intervals.", 95);
+   args.addOptionS("","norm","normalization",0,"Normalization constants for each input file provided as comma separated list of doubles (e.g. 1.0017,1.0,0.9999 ).");
+   args.addOptionL("","seed","seed",0,"Random initialization seed.");
+   if(!args.parse(*argc,argv))return 0;
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+   //}}}
+   /*
+    * N - number of samples in one replicate (the smallest number for replicates with different N_r)
+    * M - number of transcripts
+    * C - number of conditions
+    */
+   long C,M,N;
+   vector<paramT> params;
+   Conditions cond;
+   // Open file with hyper parameters and read those in.
+   if(!ns_params::readParams(args.getS("parFileName"), &params)) return 1;
+   if(args.verb())message("Parameters loaded.\n");
+   // Initialize sample files handled by object cond.
+   if(!ns_misc::readConditions(args, &C, &M, &N, &cond)) return 1;
+   // Initialize output files.
+   ofstream outF;
+   ofstream *outFiles = new ofstream[C+1];
+   // Use standard array as we don't want to bother with vector of pointers.
+   if(!ns_estimateDE::initializeOutputFile(C, M, N, args, &outF, outFiles)) return 1;
+
+   // variables {{{
+   vector<vector<vector<double> > > tr(C);
+   vector<paramT> curParams(C);
+   vector<vector<double> > samples(C,vector<double>(N));
+   vector<double> vars(N);
+   vector<double> mu_c(C);
+//   vector<vector<double> > mus(C,vector<double>(N,0));
+//   vector<double> vars(N);
+   long c,c2,m,n,r;
+   double prec,var,sum,sumSq,alpha,beta,betaPar,mu_00,normMu;
+   double lambda0 = args.getD("lambda0");
+   long RC;
+   MyTimer timer;
+   boost::random::mt11213b rng_mt(ns_misc::getSeed(args));
+   boost::random::gamma_distribution<long double> gammaDistribution;
+   typedef boost::random::gamma_distribution<long double>::param_type gDP;
+   boost::random::normal_distribution<long double> normalDistribution;
+   typedef boost::random::normal_distribution<long double>::param_type nDP;
+   double log2FC, pplr, ciLow, ciHigh;
+   vector<double> difs(N);
+   // }}}
+
+   if(args.verbose){ //{{{
+      timer.split();
+      message("Sampling condition mean expression.\n");
+   }//}}}
+   for(m=0;m<M;m++){
+      if(progressLog(m,M,10,' '))timer.split();
+      // Read into tr and assign hyperparameters into curParams, initialize mu_00.
+      // cond does not really change, just reads more data from file.
+      ns_estimateDE::readNextTranscript(m, C, N, &cond, params, &tr, &curParams, &mu_00);
+      // Zero "mean condition mean expression".
+      mu_c.assign(C,0);
+      // Sample condition mean expressions {{{
+      for(n=0;n<N;n++){
+         for(c=0;c<C;c++){
+            RC = cond.getRC(c);
+            alpha = curParams[c].alpha + RC / 2.0;
+            betaPar = lambda0*mu_00*mu_00;
+
+            sum=0;
+            sumSq=0;
+            for(r=0;r< RC;r++){
+               sum += tr[c][r][n];
+               sumSq += tr[c][r][n]*tr[c][r][n];
+            }
+            betaPar += sumSq - (lambda0*mu_00 + sum)*(lambda0*mu_00 + sum) /
+               (lambda0 + RC);
+            normMu= (lambda0*mu_00 + sum) / (lambda0 + RC);
+            beta = curParams[c].beta + betaPar / 2 ;
+            // Set parameters of gamma distribution.
+            gammaDistribution.param(gDP(alpha, 1.0/beta));
+            // Sample precision.
+            prec = gammaDistribution(rng_mt);
+            // Variance, the precision is scaled by (lambda0+RC).
+            var = 1/(prec *(lambda0 + RC));
+            vars[n] = var;
+
+            // Set parameter for normal distribution.
+            normalDistribution.param(nDP(normMu, sqrt(var)));
+            // Sample condition mean.
+            samples[c][n] = normalDistribution(rng_mt);
+            mu_c[c] += samples[c][n];
+         }
+         R_INTERUPT;
+      }
+      // }}}
+      // Compute condition mean for each condition.
+      for(c=0;c<C;c++) mu_c[c] /= N;
+      // Calculate and write pplr for each pair of conditions. {{{
+      for(c=0;c<C;c++){
+         for(c2=c+1;c2<C;c2++){
+            pplr = 0;
+            for(n=0;n<N;n++)
+               if(samples[c2][n] > samples[c][n])pplr+=1;
+            pplr/=N;
+            outF<<pplr<<" ";
+         }
+      }
+      // }}}
+      // Calculate log2FC; write log2FC and CIs for each pair of conditions. {{{
+      for(c=0;c<C;c++){
+         for(c2=c+1;c2<C;c2++){
+            for(n=0;n<N;n++)
+               difs[n] = samples[c2][n]-samples[c][n];
+            ns_misc::computeCI(args.getD("cf"), &difs, &ciLow, &ciHigh);
+            ciLow /= log(2);
+            ciHigh /= log(2);
+            log2FC = (mu_c[c2] - mu_c[c])/log(2);
+            outF<<log2FC<<" "<<ciLow<<" "<<ciHigh<<" ";
+         }
+      }
+      // }}}
+      // Write logged condition mean for each condition. No space before EOL. {{{
+      for(c = 0; c < C-1; c++)outF<<mu_c[c]<<" ";
+      outF<<mu_c[C-1]<<endl;
+      // }}}
+      // Write samples if necessary. {{{ 
+      if(args.flag("samples")){
+         for(c=0;c<C;c++){
+            for(n=0;n<N;n++)outFiles[c]<<samples[c][n]<<" ";
+            outFiles[c]<<endl;
+         }
+         // Save sampled variance as well.
+         for(n=0;n<N;n++) outFiles[C]<<vars[n]<<" ";
+         outFiles[C]<<endl;
+      }//}}}
+   }
+   // Close and exit {{{
+   if(args.flag("samples")){
+      for(c=0;c<C+1;c++)outFiles[c].close();
+   }
+   outF.close();
+   if(args.verbose)message("DONE\n");
+   // }}}
+   return 0;
+}
+
+#ifndef BIOC_BUILD
+int main(int argc,char* argv[]){
+   return estimateDE(&argc,argv);
+}
+#endif
+
+namespace ns_estimateDE {
+
+bool initializeOutputFile(long C, long M, long N, const ArgumentParser &args, ofstream *outF, ofstream outFiles[]){//{{{
+   if(args.flag("samples")){
+      // If samples flag is set, then write condition mean expression samples into -C?.est files.
+      // Also write variance samples into samples file.
+      stringstream fnStream;
+      string fileName;
+      // Initialize samples files.
+      for(long c=0;c<C;c++){
+         fnStream.str("");
+         fnStream<<args.getS("outFilePrefix")<<"-C"<<c<<".est";
+         fileName = fnStream.str();
+         outFiles[c].open(fileName.c_str());
+         if(! outFiles[c].is_open()){
+            error("Unable to open output file %s\n",fileName.c_str());
+            return false;
+         }
+         // Write header for samples file.
+         outFiles[c]<<"# Inferred condition mean log expression.\n"
+                      "# condition "<<c+1
+                    <<"\n# ";
+         for(long i=0;i<(long)args.args().size();i++){
+            outFiles[c]<<args.args()[i]<<" ";
+         }
+         outFiles[c]<<"\n# lambda_0 "<<args.getD("lambda0")<<"\n# T (Mrows_Ncols) L (logged)\n# M "<<M<<"\n# N "<<N<<endl;
+      }
+      // Initialize file for variances.
+      string varFileName = args.getS("outFilePrefix")+".estVar";
+      outFiles[C].open(varFileName.c_str());
+      if(! outFiles[C].is_open()){
+         error("Unable to open output file %s\n",varFileName.c_str());
+         return false;
+      }
+      // Write header for variance file.
+      outFiles[C]<<"# Inferred variances in last condition.\n"
+                   "# lambda_0 "<<args.getD("lambda0")
+                 <<"\n# T \n# M "<<M<<"\n# N "<<N
+                 <<endl;
+   }
+   // Initialize PPLR file.
+   string outFileName = args.getS("outFilePrefix")+".pplr";
+   outF->open(outFileName.c_str());
+   if(! outF->is_open()){
+      error("Unable to open output file %s\n",outFileName.c_str());
+      return false;
+   }
+   // Write header for PPLR file.
+   *outF<<"# ";
+   for(long i=0;i<(long)args.args().size();i++){
+      *outF<<args.args()[i]<<" ";
+   }
+   *outF<<"\n# lambda_0 "<<args.getD("lambda0")<<"\n# T \n# M "<<M<<"\n# N "<<N<<"\n"
+        <<"# Conditions: C "<<C<<" Condition pairs("<<C*(C-1)/2<<"): ";
+   for(long c=0;c<C;c++)
+      for(long c2=c+1;c2<C;c2++)
+      *outF<<c+1<<"~"<<c2+1<<" ";
+   *outF<<"\n# Columns contain PPLR for each pair of conditions, "
+          "log2 fold change with confidence intervals for each pair of conditions and "
+          "log mean condition mean expression for each condition.\n"
+          "# CPxPPLR CPx(log2FC ConfidenceLow ConfidenceHigh) "
+          "Cx(log mean condition mean expressions)"
+        <<endl;
+   return true;
+}//}}}
+
+void getParams(double expr,const vector<paramT> &params, paramT *par){//{{{
+   long i=0,j=params.size()-1,k;
+   if(expr<=params[0].expr){
+      par->alpha=params[0].alpha;
+      par->beta=params[0].beta;
+      return;
+   }
+   if(expr>=params[j].expr){
+      par->alpha=params[j].alpha;
+      par->beta=params[j].beta;
+      return;
+   }
+   while(j-i>1){
+      k=(i+j)/2;
+      if(params[k].expr<=expr)i=k;
+      else j=k;
+   }
+   if(expr-params[i].expr<params[j].expr-expr)k=i;
+   else k=j;
+   
+   par->alpha=params[k].alpha;
+   par->beta=params[k].beta;
+}//}}}
+
+void readNextTranscript(long m, long C, long N, Conditions *cond, const vector<paramT> &params, vector<vector<vector<double> > > *tr, vector<paramT> *curParams, double *mu_00){//{{{
+   double divT = 0, divC, mu_0;
+   long c,r,n,RC;
+   *mu_00 = 0;
+   for(c=0;c<C;c++){
+      mu_0=0;
+      divC=0;
+      RC = cond->getRC(c);
+      if((long)(*tr)[c].size() < RC){
+         (*tr)[c].resize( RC );
+      }
+      for(r=0;r<RC;r++){
+         if(cond->getTranscript(c, r , m, (*tr)[c][r]), N){
+            for(n=0;n<N;n++){
+               // Log the expression samples if the files don't have logged flag set.
+               if(!cond->logged())(*tr)[c][r][n] = ((*tr)[c][r][n] == 0)? ns_misc::LOG_ZERO : log ((*tr)[c][r][n] );
+               mu_0+=(*tr)[c][r][n];
+            }
+            divC+=1;
+         }else{
+            warning("Main: Condition %ld replicate %ld does not seem to have transcript %ld.\n",c,r,m);
+         }
+      }
+      R_INTERUPT;
+      if(divC>0){
+         mu_0 /= (divC * N); // take mean over all replicates
+         *mu_00+=mu_0;
+         divT++;
+      }
+      getParams(mu_0, params, &(*curParams)[c]);
+   }
+   *mu_00/=divT; 
+}//}}}
+
+}
diff --git a/estimateExpression.cpp b/estimateExpression.cpp
new file mode 100644
index 0000000..489968f
--- /dev/null
+++ b/estimateExpression.cpp
@@ -0,0 +1,597 @@
+#include<algorithm>
+#include<ctime>
+#include<cmath>
+#ifdef _OPENMP
+#include<omp.h>
+#endif
+#include<sstream>
+
+#include "ArgumentParser.h"
+#include "CollapsedSampler.h"
+#include "FileHeader.h"
+#include "GibbsSampler.h"
+#include "misc.h"
+#include "MyTimer.h"
+#include "Sampler.h"
+#include "TagAlignments.h"
+#include "TranscriptInfo.h"
+#include "transposeFiles.h"
+
+#include "common.h"
+
+#define DEBUG(x)
+#define FF first
+#define SS second
+
+//#define LOG_NEED
+//#define LOG_RHAT
+TranscriptInfo trInfo;
+
+long  M;//, mAll; // M : number of transcripts (include transcript 0 ~ Noise)
+//long N, 
+long Nunmap; // N: number of read, un-mappable read, mappable reads
+
+vector<string> samplesFileNames;
+string failedMessage;
+
+void clearDataEE(){
+   samplesFileNames.clear();
+}
+
+TagAlignments* readData(const ArgumentParser &args) {//{{{
+   long i,j,num,tid;
+   double prb;
+   long Ntotal=0,Nmap=0,probM=0;
+   string readId,strand,blank;
+   ifstream inFile;
+   MyTimer timer;
+   TagAlignments *alignments = new TagAlignments(false);
+
+   // Read alignment probabilities {{{
+   inFile.open(args.args()[0].c_str());
+   FileHeader fh(&inFile);
+   ns_fileHeader::AlignmentFileType format;
+   if((!fh.probHeader(&Nmap,&Ntotal,&probM,&format)) || (Nmap ==0)){//{{{
+      error("Prob file header read failed.\n");
+      return NULL;
+   }//}}}
+   // Use number of transcripts from prob file if it is higher.
+   if(probM>M)M = probM;
+   message("N mapped: %ld\n",Nmap);
+   messageF("N total:  %ld\n",Ntotal);
+   if(args.verb())message("Reading alignments.\n");
+   if(Ntotal>Nmap)Nunmap=Ntotal-Nmap;
+   else Nunmap=1; //no valid count file assume only one not aligned properly
+   alignments->init(Nmap,0,M);
+   long mod=10000;
+   long bad = 0;
+   timer.start();
+   for(i = 0; i < Nmap; i++) {
+      inFile>>readId>>num;
+      if(format==ns_fileHeader::OLD_FORMAT)inFile>>blank;
+      if(!inFile.good())break;
+     //    message("%s %ld\n",(readId).c_str(),num);
+      for(j = 0; j < num; j++) {
+         if(format == ns_fileHeader::OLD_FORMAT)inFile>>tid>>strand>>prb;
+         else inFile>>tid>>prb;
+         if(inFile.fail()){
+            inFile.clear();
+            // ignore other read's alignments
+            j=num;
+            // this read goes to noise assigning
+            tid=0;
+            // 10 means either 10 or exp(10), but should be still be large enough
+            prb=10;
+            bad++;
+         }
+         switch(format){
+            case ns_fileHeader::OLD_FORMAT:
+               if(tid!=0) prb /= trInfo.L(tid-1);
+            case ns_fileHeader::NEW_FORMAT:
+               alignments->pushAlignment(tid, prb);
+               break;
+            case ns_fileHeader::LOG_FORMAT:
+               alignments->pushAlignmentL(tid, prb);
+         } 
+      }
+      // ignore rest of line
+      inFile.ignore(10000000,'\n');
+
+      alignments->pushRead();
+   
+      R_INTERUPT;
+      if(args.verb() && (i % mod == 0) && (i>0)){
+         message("  %ld ",i);
+         timer.split();
+         mod*=10;
+      }
+   }
+   if(bad>0)warning("Main: %ld reads' alignment information were corrupted.\n",bad);
+   inFile.close();
+   long Nhits,NreadsReal;
+   alignments->finalizeRead(&M, &NreadsReal, &Nhits);
+   // If the transcript info is initialized, check that the number of transcripts has not changed.
+   // The number can't be smaller as it starts off with trInfo->M
+   if((trInfo.isOK())&&(M > trInfo.getM() + 1)){
+      if(args.getS("outputType") == "rpkm"){
+         error("Main: Number of transcripts in .prob file is higher than in the .tr file (%ld %ld)!\n",M,trInfo.getM() + 1);
+         delete alignments;
+         return NULL;
+      }else{
+         warning("Main: Number of transcripts in .prob file is higher than in the .tr file (%ld %ld)!\n   This can cause problems later on!\n",M,trInfo.getM() + 1);
+      }
+   }
+   //}}}
+   if(i<Nmap)message("Read only %ld reads.\n",NreadsReal);
+   message("All alignments: %ld\n",Nhits);
+   messageF("Isoforms: %ld\n",M);
+   Nmap = NreadsReal;
+   return alignments;
+   /* {{{ remapping isoforms to ignore those without any hits
+   M = mAll;
+   M = isoformsHit;
+   isoformMap.assign(M);
+   for(i=0,j=0;i<mAll;i++)
+      if(readInIsoform[i]!=-1){
+         readInIsoform[i]=j;
+         isoformMap[j]=i;
+         j++;
+      }
+   for(i=0;i<Sof(alignments);i++){
+      alignments[i].setTrId( readInIsoform[ alignments[i].getTrId() ] );
+   }
+   }}}*/
+}//}}}
+
+void MCMC(TagAlignments *alignments,gibbsParameters &gPar,ArgumentParser &args){//{{{
+   // Declarations: {{{
+   DEBUG(message("Declarations:\n"));
+   long i,j,samplesHave=0,totalSamples=0,samplesN,chainsN,samplesSave,seed;
+   pairD rMean,tmpA,tmpV,sumNorms;
+   double rH1,rH2;
+   ofstream meansFile;
+   ofstream *samplesFile = new ofstream[gPar.chainsN()];
+   MyTimer timer;
+   bool quitNext = false;
+   vector<pairD> betwVar(M),withVar(M),s2j(M),totAverage(M),av,var;
+   vector<pair<pairD,long> > rHat2(M);
+   // }}}
+   // Names: {{{
+   stringstream sstr;
+   #ifdef LOG_RHAT
+      sstr.str("");
+      sstr<<args.getS("outFilePrefix")<<".rhatLog";
+      string rhatLogFile = sstr.str();
+   #endif
+   #ifdef LOG_NEED
+      sstr.str("");
+      sstr<<args.getS("outFilePrefix")<<".effLog";
+      string effLogFile = sstr.str();
+   #endif
+   // }}}
+   // Init: {{{
+   DEBUG(message("Initialization:\n"));
+   samplesN=gPar.samplesN();
+   chainsN=gPar.chainsN();
+   samplesSave=(gPar.samplesSave()-1)/chainsN+1;
+
+   vector<Sampler*> samplers(chainsN);
+   if( ! args.flag("gibbs")){
+      for(j=0;j<chainsN;j++)
+         samplers[j] = new CollapsedSampler;
+   }else{
+      for(j=0;j<chainsN;j++)
+         samplers[j] = new GibbsSampler;
+   }
+
+   timer.start();
+   timer.start(1);
+   if(args.isSet("seed"))seed=args.getL("seed");
+   else seed = time(NULL);
+   if(args.verbose)message("seed: %ld\n",seed);
+   for(i=0;i<chainsN;i++){
+      // Init samplers
+      DEBUG(message("Sampler %ld init.\n",i);)
+      samplers[i]->noSave();
+      DEBUG(message("init\n");)
+      samplers[i]->init(M, samplesN, samplesSave, Nunmap, alignments, gPar.beta(), gPar.dir(), seed);
+      DEBUG(message("   seed: %ld\n",seed);)
+      // sampler is initialized with 'seed' and then sets 'seed' to new random seed for the next sampler
+   }
+   // parallel block: 
+   // make sure that all functions used are CONST and variables are being READ or private
+   // private: samplesHave (or subCounter)
+#ifdef BIOC_BUILD
+   long samplesDo, subCounter;
+   for(samplesHave=0;samplesHave<gPar.burnIn();samplesHave+=samplesDo){
+      samplesDo = min(gPar.burnIn() - samplesHave, samplesAtOnce);
+      #pragma omp parallel for private(subCounter)
+      for(i=0;i<chainsN;i++){
+         for(subCounter=0;subCounter<samplesDo; subCounter++){
+           samplers[i]->sample();
+         }
+      }
+      // Check for interrupt out of the parallel part.
+      R_INTERUPT;
+   }
+#else
+   #pragma omp parallel for private(samplesHave)
+   for(i=0;i<chainsN;i++){
+      DEBUG(message(" burn in\n");) 
+      for(samplesHave=0;samplesHave<gPar.burnIn();samplesHave++){
+         samplers[i]->sample();
+      }
+   }
+#endif
+   totalSamples = gPar.burnIn();
+   message("Burn in: %ld DONE. ",gPar.burnIn());
+   DEBUG(message(" reseting samplers after BurnIn\n"));
+   for(i=0;i<chainsN;i++){
+      samplers[i]->resetSampler(samplesN);
+   }
+   timer.split(0,'m');
+   //}}}
+   // Main sampling loop:
+   while(1){
+      timer.start();
+      // Sample: {{{
+      // parallel block:
+      // make sure that all functions used are CONST and variables are being READ or private
+      // private: samplesHave (or subCounter)
+#ifdef BIOC_BUILD
+      for(samplesHave=0;samplesHave<samplesN;samplesHave+=samplesDo){
+         samplesDo = min(samplesN - samplesHave, samplesAtOnce);
+         #pragma omp parallel for private(subCounter)
+         for(i=0;i<chainsN;i++){
+            for(subCounter=0;subCounter<samplesDo; subCounter++){
+               samplers[i]->sample();
+               samplers[i]->update();
+            }
+         }
+         // Check for interrupt out of the parallel part.
+         R_INTERUPT;
+      }
+#else
+      #pragma omp parallel for private(samplesHave)
+      for(i=0;i<chainsN;i++){
+         for(samplesHave = 0;samplesHave<samplesN;samplesHave++){
+            samplers[i]->sample();
+            samplers[i]->update();
+         }
+      }
+#endif
+      totalSamples += samplesN;
+      message("\nSampling DONE. ");
+      timer.split(0,'m');
+      //}}}
+      // Check for change of parameters: {{{
+      gPar.readParameters();
+      // }}}
+      // Compute convergence statistics {{{
+      totAverage.assign(M,pairD(0,0));
+      betwVar.assign(M,pairD(0,0));
+      withVar.assign(M,pairD(0,0));
+      // Norms for sums (used for variance and mean), should be same for all
+      // samplers and all transcripts.
+      sumNorms = samplers[0]->getSumNorms();
+      samplesHave = (long)sumNorms.FF;
+      for(i=0;i<M;i++){
+         for(j=0;j<chainsN;j++){
+            tmpA = samplers[j]->getAverage(i);
+            tmpV = samplers[j]->getWithinVariance(i);
+            totAverage[i].FF += tmpA.FF;
+            totAverage[i].SS += tmpA.SS;
+            withVar[i].FF += tmpV.FF;
+            withVar[i].SS += tmpV.SS;
+         }
+         totAverage[i].FF /= chainsN;
+         totAverage[i].SS /= chainsN;
+         withVar[i].FF /= chainsN;
+         withVar[i].SS /= chainsN;
+         for(j=0;j<chainsN;j++){
+            tmpA = samplers[j]->getAverage(i);
+            betwVar[i].FF += (totAverage[i].FF - tmpA.FF)*(totAverage[i].FF - tmpA.FF);
+            betwVar[i].SS += (totAverage[i].SS - tmpA.SS)*(totAverage[i].SS - tmpA.SS);
+         }
+         betwVar[i].FF /= (chainsN-1.0);
+         betwVar[i].SS /= (chainsN-1.0);
+      }
+      for(i=0;i<M;i++){
+         // betwVar[i] *= samplesHave / (chainsN - 1.0);
+         rHat2[i].SS=i;
+         if(withVar[i].FF == 0 ){
+            rHat2[i].FF.FF = 0;
+            rHat2[i].FF.SS = 0;
+         } else {
+            // First 'column' is Rhat of logit(theta).
+            rHat2[i].FF.FF = (sumNorms.SS - 1.0) / sumNorms.SS + betwVar[i].SS / withVar[i].SS ;
+            rHat2[i].FF.SS = (sumNorms.FF - 1.0) / sumNorms.FF + betwVar[i].FF / withVar[i].FF ;
+               //betwVar[i] / ( samplesHave * withVar[i] );
+         }
+      }
+      sort(rHat2.rbegin(),rHat2.rend());
+      message("rHat (for %ld samples) \n",samplesN);
+      rMean.FF=0;
+      rMean.SS=0;
+      message("    rHat   (rH theta|    tid | mean theta)\n");
+      for(i=0;(i<10) && (i<M);i++){
+         rH1 = sqrt(rHat2[i].FF.FF);
+         rH2 = sqrt(rHat2[i].FF.SS);
+         rMean.FF+=rH1;
+         rMean.SS+=rH2;
+//         message("   %lf (%lf | %ld | %lf|%lf|%lf)",rHat2[i].FF.FF,rHat2[i].FF.SS,rHat2[i].SS,totAverage[rHat2[i].SS].FF,withVar[rHat2[i].SS].FF,betwVar[rHat2[i].SS].FF/samplesHave);
+         if((i<3) || args.verbose){
+            message("   %7.4lf (%7.4lf | %6ld | %8.5lf)",rH1,rH2,rHat2[i].SS-1,totAverage[rHat2[i].SS].FF);
+            message("\n");
+         }
+//                  message("   %lf",sqrt(rHat2[i].FF));
+      }                  
+      rMean.FF /= 10.0;
+      rMean.SS /= 10.0;
+      message("  Mean rHat of worst 10 transcripts: %lf\n",rMean.FF);
+      if(args.flag("scaleReduction"))message("   (target: %.3lf)\n",gPar.targetScaleReduction());
+      message("  Mean C0: (");
+      for(j=0;j<chainsN;j++)message("%ld ",samplers[j]->getAverageC0());
+      message("). Nunmap: %ld\n",Nunmap);
+      if(args.flag("gibbs"))message("  Mean thetaAct (noise parameter)\n   %lf\n",totAverage[0].FF);
+      messageF("\n");
+      //}}}
+      // Log rHat if necessary. {{{
+      #ifdef LOG_RHAT
+         ofstream rhatLog(rhatLogFile.c_str(), ofstream::app);
+         rhatLog<<totalSamples<<" "<<(long)timer.getTime(1);
+         for(i=1;i<M;i++){
+            rhatLog<<" "<<sqrt(rHat2[i].FF.FF);
+         }
+         rhatLog<<endl;
+         rhatLog.close();
+      #endif
+      // }}}
+      // Increase sample size and start over: {{{
+      if(quitNext){// Sampling iterations end {{{
+         if(sqrt(rHat2[0].FF.FF) > gPar.targetScaleReduction()){
+            message("WARNING: Following transcripts failed to converge entirely\n   (however the estimates might still be usable):\n");
+            long countUncoverged=0;
+            sstr.str("");
+            sstr<<"# unconverged_transcripts: ";
+            for(i=0;(i<M) && (sqrt(rHat2[i].FF.FF) > gPar.targetScaleReduction());i++){
+               sstr<<rHat2[i].SS<<" ("<<sqrt(rHat2[i].FF.FF)<<") ";
+               countUncoverged++;
+               if(args.verbose)message("   %s( %ld , %lf )\n",(trInfo.trName(rHat2[i].SS-1)).c_str(),rHat2[i].SS-1,sqrt(rHat2[i].FF.FF));
+            }
+            sstr<<"\n";
+            failedMessage=sstr.str();
+            if(!args.verbose)message("   %ld transcripts (full list is in the output file)\n",countUncoverged);
+         }
+         // Close files and delete pointers.
+         for(j=0;j<chainsN;j++){
+            samplers[j]->noSave();
+            samplesFile[j].close();
+         }
+         delete[] samplesFile;
+         break;
+      }//}}}
+      if(! (args.flag("scaleReduction") || args.flag("MCMC_samplesDOmax"))){
+         vector<double> needS(M,0);
+         for(i=1;i<M;i++){
+            // between variance was not multiplied by samplesHave===n
+            // there is no chainsN in the denominator because samplesSave was already divided by chainsN
+            // Use LOGIT(theta):
+            needS[i] = samplesSave * sumNorms.SS/
+                     ((sumNorms.SS-1.0)/sumNorms.SS*withVar[i].SS/betwVar[i].SS+1.0);
+            //needS[i] = samplesSave * samplesHave/
+            //         ((samplesHave-1.0)/samplesHave*withVar[i].FF/betwVar[i].FF+1.0);
+         } 
+         // log the number of effective samples, only when testing... //{{{
+         #ifdef LOG_NEED
+            ofstream effLog(effLogFile.c_str());
+            for(i=1;i<M;i++){
+               effLog<<needS[rHat2[i].SS]<<" "<<sqrt(rHat2[i].FF.FF)<<" "<<samplesHave*betwVar[rHat2[i].SS].FF<<" "<<withVar[rHat2[i].SS].FF<<" "<<rHat2[i].SS<<endl;
+            }
+            effLog.close();
+         #endif
+         //}}}
+         sort(needS.begin(),needS.end());
+         i = (long)(M*0.95)+1; // make at least 95% transcripts converged 
+         /* samplesN -> now it will be samples needed PER chain in order to
+          * generate samplesSave*chainsN effective samples.
+          */
+         samplesN = max((long)needS[i],samplesSave);
+         quitNext = true;
+      }else{
+            // Prepare for producing samples if Rhat^2<target scale reduction
+            // OR reached samplesNmax
+            // OR produced too many samples (>500 000)
+         if((totalSamples*chainsN < 5000000) && (rMean.FF > gPar.targetScaleReduction())){
+            samplesN *= 2;
+         }else{
+            quitNext = true;
+         }
+         if((samplesN >= gPar.samplesNmax()) || args.flag("MCMC_samplesDOmax")){
+            samplesN=gPar.samplesNmax();
+            quitNext = true;
+         }
+      }
+      // if next iteration is the last one, prepare the files and make samples write samples
+      if(quitNext){ 
+         messageF("Producing %ld final samples from each chain.\n",samplesN);
+         // if samplesN<samplesSave, only samplesN samples will be saved
+         if(samplesN<samplesSave){
+            samplesSave = samplesN;
+         }
+         for(j=0;j<chainsN;j++){
+            sstr.str("");
+            sstr<<args.getS("outFilePrefix")<<"."<<args.getS("outputType")<<"S-"<<j;
+            samplesFileNames.push_back(sstr.str());
+            samplesFile[j].open(samplesFileNames[j].c_str());
+            if(! samplesFile[j].is_open()){
+               error("Main: Unable to open output file '%s'.\n",(sstr.str()).c_str());
+            }else{
+               samplesFile[j]<<"#\n# M "<<M-1<<"\n# N "<<samplesSave<<endl;
+               samplers[j]->saveSamples(&samplesFile[j],trInfo.getShiftedLengths(true),args.getS("outputType"));
+            }
+         }
+      }
+      for(j=0;j<chainsN;j++){
+         samplers[j]->resetSampler(samplesN);
+      }
+      samplesHave=0;
+      //}}}
+   }
+   // Write means: {{{
+   meansFile.open((args.getS("outFilePrefix")+".thetaMeans").c_str());
+   if(meansFile.is_open()){
+      meansFile<<"# T => Mrows \n# M "<<M-1<<endl;
+      meansFile<<"# file containing the mean value of theta - relative abundace of fragments and counts\n"
+                 "# (overall mean, overall counts, mean of saved samples, and mean from every chain are reported)\n"
+                 "# columns:\n"
+                 "# <transcriptID> <meanThetaOverall> <meanReadCountOverall> <meanThetaSaved> <varThetaOverall>";
+      for(j=0;j<chainsN;j++)meansFile<<" <chain"<<j+1<<"mean>";
+      meansFile<<endl;
+      meansFile<<scientific;
+      meansFile.precision(9);
+      double sumSaved, thetaSqSum, thetaSum, sumNorm, tSS, tS, sN, thetaVar;
+      for(i=0;i<M;i++){
+         sumSaved=thetaSqSum=thetaSum=sumNorm=0;
+         for(j=0;j<chainsN;j++){
+            sumSaved+=samplers[j]->getAverage(i).SS;
+            samplers[j]->getThetaSums(i, &tSS, &tS, &sN);
+            thetaSqSum += tSS;
+            thetaSum += tS;
+            sumNorm += sN;
+         }
+         if(i==0){
+            meansFile<<"#thetaAct:";
+         }else{
+            meansFile<<i;
+         }
+         thetaVar = thetaSqSum / (sumNorm - 1.0) -
+                    thetaSum / (sumNorm - 1.0) * thetaSum / sumNorm;
+         meansFile<<" "<<thetaSum/sumNorm<<" "<<(long)floor(thetaSum/sumNorm*alignments->getNreads()+0.5)<<" "<<sumSaved/chainsN<<" "<<thetaVar;
+         for(j=0;j<chainsN;j++)
+            meansFile<<" "<<samplers[j]->getAverage(i).FF;
+         meansFile<<endl;
+      }
+      meansFile.close();
+   }else{
+      warning("Main: Unable to write thetaMeans into: %s\n",(args.getS("outFilePrefix")+".thetaMeans").c_str());
+   }
+   //}}}
+   // Write thetaAct: {{{
+   if(args.isSet("thetaActFileName")){
+      ofstream actFile(args.getS("thetaActFileName").c_str());
+      if(actFile.is_open()){
+         actFile<<"# samples of thetaAct parameter (only generated when using gibbs sampling)\n";
+         actFile<<"# N "<<chainsN*samplesSave<<endl;
+         for(j=0;j<chainsN;j++){
+            for(i=0;i<(long)samplers[j]->getThetaActLog().size();i++)
+               actFile<<samplers[j]->getThetaActLog()[i]<<" ";
+         }
+         actFile<<endl;
+         actFile.close();
+      }else{
+         warning("Main: Unable to write thetaAct log: %s.\n",(args.getS("thetaActFileName")).c_str());
+      }
+   }
+   // }}}
+   // Free memory: {{{
+   for(j=0;j<chainsN;j++){
+      delete samplers[j];
+   }
+//   delete [] samplers;
+   //}}}
+   message("Total samples: %ld\n",totalSamples*chainsN);
+}//}}}
+
+extern "C" int estimateExpression(int *argc, char* argv[]) {//{{{
+clearDataEE();
+string programDescription =
+"Estimates expression given precomputed probabilities of (observed) reads' alignments.\n\
+   Uses MCMC sampling algorithm to produce relative abundance or RPKM.\n";
+   // Set options {{{
+   ArgumentParser args;
+   args.init(programDescription,"[prob file]",1);
+   args.addOptionS("o","outPrefix","outFilePrefix",1,"Prefix for the output files.");
+   args.addOptionS("O","outType","outputType",0,"Output type (theta, RPKM, counts, tau).","theta");
+   args.addOptionB("G","gibbs","gibbs",0,"Use Gibbs sampling instead of collapsed Gibbs sampling.");
+   args.addOptionS("p","parFile","parFileName",0,"File containing parameters for the sampler, which can be otherwise specified by --MCMC* options. As the file is checked after every MCMC iteration, the parameters can be adjusted while running.");
+   args.addOptionS("t","trInfoFile","trInfoFileName",0,"File containing transcript information. (Necessary for RPKM)");
+   args.addOptionL("P","procN","procN",0,"Limit the maximum number of threads to be used. (Default is the number of MCMC chains.)");
+   args.addOptionS("","thetaActFile","thetaActFileName",0,"File for logging noise parameter theta^{act}.");
+   args.addOptionL("","MCMC_burnIn","MCMC_burnIn",0,"Length of sampler's burn in period.",1000);
+   args.addOptionL("","MCMC_samplesN","MCMC_samplesN",0,"Initial number of samples produced. Doubles after every iteration.",1000);
+   args.addOptionL("","MCMC_samplesSave","MCMC_samplesSave",0,"Number of samples recorder in total.",1000);
+   args.addOptionL("","MCMC_samplesNmax","MCMC_samplesNmax",0,"Maximum number of samples produced in one iteration. After producing samplesNmax samples sampler finishes.",50000);
+   args.addOptionB("","MCMC_samplesDOmax","MCMC_samplesDOmax",0,"Produce maximum number of samples (samplesNmax) in second iteration and quit.");
+   args.addOptionL("","MCMC_chainsN","MCMC_chainsN",0,"Number of parallel chains used. At least two chains will be used.",4);
+   args.addOptionD("","MCMC_scaleReduction","MCMC_scaleReduction",0,"Target scale reduction, sampler finishes after this value is met.",1.2);
+   args.addOptionD("","MCMC_dirAlpha","MCMC_dirAlpha",0,"Alpha parameter for the Dirichlet distribution.",1.0);
+   args.addOptionB("","scaleReduction","scaleReduction",0,"Use scale reduction as stopping criterion, instead of computing effective sample size.");
+   args.addOptionL("s","seed","seed",0,"Random initialization seed.");
+   if(!args.parse(*argc,argv))return 0;
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+   // }}}
+   MyTimer timer;
+   gibbsParameters gPar;
+   TagAlignments *alignments=NULL;
+//{{{ Initialization:
+
+   gPar.setParameters(args);
+   if(args.isSet("parFileName")){
+      gPar.setParameters(args.getS("parFileName"));
+   }
+   args.updateS("outputType", ns_expression::getOutputType(args));
+   if(args.verbose)gPar.getAllParameters();
+#ifdef SUPPORT_OPENMP
+   if(args.isSet("procN"))
+      omp_set_num_threads(args.getL("procN"));
+   else
+      omp_set_num_threads(gPar.chainsN());
+#endif
+
+
+   //}}}
+   // {{{ Read transcriptInfo and .prob file 
+   if((!args.isSet("trInfoFileName"))||(!trInfo.readInfo(args.getS("trInfoFileName")))){
+      if(args.getS("outputType") == "rpkm"){
+         error("Main: Missing transcript info file. The file is necessary for producing RPKM.\n");
+         return 1;
+      }
+   }else{
+      M = trInfo.getM()+1;
+   }
+   alignments = readData(args);
+   if(! alignments){
+      error("Main: Reading alignments failed.\n");
+      return 1;
+   }
+   if(M<=0){
+      error("Main: Invalid number of transcripts in .prob file.\n");
+      return 1;
+   }
+   // }}}
+
+   if(args.verbose)timer.split();
+   if(args.verbose)messageF("Starting the sampler.\n");
+   MCMC(alignments,gPar,args);
+   // {{{ Transpose and merge sample file 
+   if(transposeFiles(samplesFileNames,args.getS("outFilePrefix")+"."+args.getS("outputType"),args.verbose,failedMessage)){
+      if(args.verbose)message("Sample files transposed. Deleting.\n");
+      for(long i=0;i<(long)samplesFileNames.size();i++){
+         remove(samplesFileNames[i].c_str());
+      }
+   }else{
+      message("Transposing files failed. Please check the files and try using trasposeLargeFile to transpose & merge the files into single file.\n");
+   }
+   //}}}
+   delete alignments;
+   message("DONE. ");
+   timer.split(0,'m');
+   return 0;
+}//}}}
+
+#ifndef BIOC_BUILD
+int main(int argc, char* argv[]) {
+   return estimateExpression(&argc,argv);
+}
+#endif
diff --git a/estimateHyperPar.cpp b/estimateHyperPar.cpp
new file mode 100644
index 0000000..65a936a
--- /dev/null
+++ b/estimateHyperPar.cpp
@@ -0,0 +1,369 @@
+/*
+ * Hyperparameter model in estimate[*]HyperPar.cpp always depends on the model used in 
+ *  relevant estimate[*]DE.cpp
+ */
+// DECLARATIONS: {{{
+#include <algorithm>
+#include <cmath>
+#include <fstream>
+#include <sstream>
+#include "boost/random/mersenne_twister.hpp"
+#include "boost/random/normal_distribution.hpp"
+#include "boost/random/uniform_01.hpp"
+using namespace std;
+
+#include "ArgumentParser.h"
+#include "lowess.h"
+#include "MyTimer.h"
+#include "misc.h"
+#include "PosteriorSamples.h"
+#include "TranscriptExpression.h"
+
+#include "common.h"
+
+using ns_params::paramT;
+
+//}}}
+// Defaults: {{{
+#define ALPHA_PROP 0.1
+#define BETA_PROP 0.08
+#define subM_MIN 10
+#define subM_MAX 5000
+#define SAMPLES_N 2
+#define MAX_ITER 1000
+#define MAX_RETRIES 10
+#define MAX_PARAM 5000
+//}}}
+
+extern "C" int estimateHyperPar(int *argc,char* argv[]){
+string programDescription =
+"Estimate expression dependent hyperparameters from the dataset.\n\
+   [sample Files] should contain transposed MCMC samples from replicates.\n\
+   To distinguish conditions use C between them e.g.:\n\
+      samplesC1-R1.rpkm samplesC1-R2.rpkm C samplesC2-R1.rpkm samplesC2-R2.rpkm";
+   // Intro: {{{
+   // Set options {{{
+   ArgumentParser args(programDescription,"[sampleFiles]",1);
+   args.addOptionB("V","veryVerbose","veryVerbose",0,"More verbose output.");
+   args.addOptionS("o","outFile","outFileName",1,"Name of the output file.");
+   args.addOptionS("p","paramsAllFile","paramsAllFileName",0,"Name of the file to which to store all parameter values generated prior to lowess smoothing.");
+   args.addOptionS("","meanFile","meanFileName",0,"Name of the file containing joint mean and variance.");
+   args.addOptionL("g","groupsNumber","groupsN",0,"Number of groups of transcript of similar size.",200);
+   args.addOptionL("s","samplesNumber","samplesN",0,"Number of samples generated for each group.",SAMPLES_N);
+   args.addOptionD("l","lambda0","lambda0",0,"Precision scaling parameter lambda0.",2.0);
+   args.addOptionD("","exThreshold","exT",0,"Threshold of lowest expression for which the estimation is done.",-5);
+   args.addOptionB("S","smoothOnly","smoothOnly",0,"Input file contains previously sampled hyperparameters which should smoothed only.");
+   args.addOptionD("","lowess-f","lowess-f",0,"Parameter F for lowess smoothing specifying amount of smoothing.",0.2);
+   args.addOptionL("","lowess-steps","lowess-steps",0,"Parameter Nsteps for lowess smoothing specifying number of iterations.",5);
+   args.addOptionB("","noforce","noforce",0,"Do not force smoothing of the parameters.",false);
+   args.addOptionS("","norm","normalization",0,"Normalization constants for each input file provided as comma separated list of doubles (e.g. 1.0017,1.0,0.9999 ).");
+   args.addOptionL("","seed","seed",0,"Random initialization seed.");
+   if(!args.parse(*argc,argv))return 0;
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+   // }}}
+
+   MyTimer timer;
+   timer.start(1);
+   long i,M=0,N,RTN,C;
+   bool storeAll=args.isSet("paramsAllFileName");
+   vector<paramT> params;
+   paramT param;
+   TranscriptExpression trExp;
+   ofstream outF;
+
+   if(! args.flag("smoothOnly")){
+      if(! args.isSet("meanFileName")){
+         error("Main: Please provide mean file name (--meanFile).\n");
+         return 1;
+      }
+      trExp.readExpression(args.getS("meanFileName"), MEAN_VARIANCE);
+      // Force user to use logged mean and samples.
+      if(!trExp.isLogged()){
+         error("Main: Please compute the expression mean from logged samples (getVariance --log ...).\n");
+         return 1;
+      }
+      M = trExp.getM();
+      if(args.verbose)message("Transcripts in expression file: %ld\n",M);
+      trExp.doSort(true);
+   } 
+   
+   if(!ns_misc::openOutput(args, &outF)) return 1;
+   ///}}}
+
+   if(args.flag("smoothOnly")){ 
+      // Reading previously sampled parameters. (header is copie into outF)
+      readParams(args.args()[0], &params, &outF);
+   }else{ 
+      // Sampling parameters based on data
+      // Read conditions {{{   
+      Conditions cond;
+      if(!ns_misc::readConditions(args, &C, &M, &N, &cond)) return 1;
+      RTN = cond.getRN();
+      if(args.verbose)message("Number of all replicates: %ld\n",RTN);
+
+      // Prepare file for storing all sampled parameters.
+      ofstream paramsF;
+      if(storeAll){
+         if(!ns_misc::openOutput(args.getS("paramsAllFileName"), &paramsF)) return 1;
+         paramsF<<"# lambda0 "<<args.getD("lambda0")<<endl;
+      }
+      // }}}
+      // Declarations {{{
+      vector<long double> mu0(subM_MAX,0);
+      vector<vector<vector<double> > > tr(subM_MAX,vector<vector<double> >(RTN));
+      vector<vector<long double> > bAdd(subM_MAX,vector<long double> (C,0));
+      boost::random::mt11213b rng_mt(ns_misc::getSeed(args));
+      boost::random::uniform_01<long double> uniformDistribution;
+      boost::random::normal_distribution<long double> normalDistributionA,normalDistributionB;
+      typedef boost::random::normal_distribution<long double>::param_type nDP;
+      
+      long double alpha,beta,alphaP,betaP,prob,probAll,probC,mean,old_mult,proposalMultiplier,acceptR,sum,sumS,lambda0,exDelta,exLast;
+      long samp,samplesN,samplesREDO,maxIter,r,c,m,curM,Rc,subM;
+      bool breaked=false,good=false;
+      //}}}
+      // Initial values {{{ 
+      alpha=uniformDistribution(rng_mt)*10.0;
+      beta=uniformDistribution(rng_mt)*5.0;
+      old_mult=0;
+      proposalMultiplier=2.0;
+      prob = 0;
+      lambda0 = args.getD("lambda0");
+      samplesN = args.getL("samplesN");
+      curM=0;
+      exDelta = (trExp.exp(0)-trExp.exp(M-1))/args.getL("groupsN");
+      exLast = trExp.exp(0);
+      if(args.verbose)message("Expression step: %Lg\n",exDelta);
+      // }}}
+      timer.split();
+      if(args.verbose)message("Running sampler.\n");
+      while(curM<M){
+         // Reading next group of transcripts {{{
+         mean=0;
+         m = 0;
+         while((curM<M)&&(m<subM_MAX)){
+            if(trExp.exp(curM)<args.getD("exT")){
+               if(args.verbose)message("skipping expression: %lg\n",trExp.exp(curM));
+               break;
+            }
+            for(r=0;r<RTN;r++){
+               good = cond.getTranscript(r, trExp.id(curM), tr[m][r],samplesN+MAX_RETRIES);
+               if(!good)break;
+               // If sampels were not logged, log them now.
+               if(!cond.logged())
+                  for(samp=0;samp<samplesN+MAX_RETRIES;samp++){
+                     tr[m][r][samp] = (tr[m][r][samp] == 0)? ns_misc::LOG_ZERO:log(tr[m][r][samp]);
+                  }
+            }
+            if(good){
+               mu0[m]=trExp.exp(curM);
+               mean+=mu0[m];
+               m++;
+            }
+            curM++;
+            if(args.flag("veryVerbose"))if(progressLog(curM,M,10,' '))timer.split(0,'m');
+            if((m>=subM_MIN)&&(exDelta<exLast-trExp.exp(curM-1)))break;
+         }
+         exLast = trExp.exp(curM-1);
+         if(m<subM_MIN)break;
+         subM = m;
+         mean/=subM;
+         if(args.flag("veryVerbose"))message("# mean: %Lg  subM: %ld\n",mean,subM);
+         if(storeAll)paramsF<<"# mean: "<<mean<<"  subM: "<<subM<<endl;
+         samplesREDO = 0;
+         //}}}
+         for(samp=0;samp<samplesN+samplesREDO;samp++){
+            // Computing Badd_gc and initializing {{{
+            for(m=0;m<subM;m++){
+               i=0; // counter over all replicates;
+               for(c=0;c<C;c++){
+                  sum = 0;
+                  sumS = 0;
+                  Rc=cond.getRC(c);
+                  for(r=0;r<Rc;r++){
+                     sum += tr[m][i][samp];
+                     sumS += tr[m][i][samp]*tr[m][i][samp];
+                     i++;
+                  }
+                  bAdd[m][c]=0.5*(sumS + mu0[m]*mu0[m]*lambda0 - 
+                        (sum+mu0[m]*lambda0)*(sum+mu0[m]*lambda0)/(lambda0+Rc));
+               }
+            } 
+            acceptR=0;
+            old_mult=0;
+            proposalMultiplier=proposalMultiplier*2.0;
+            normalDistributionA.param(nDP(0,ALPHA_PROP*proposalMultiplier));
+            normalDistributionB.param(nDP(0,BETA_PROP*proposalMultiplier));
+            maxIter=0;
+            breaked = false;
+            R_INTERUPT;
+            //}}}
+            while((acceptR<0.25)||(acceptR>0.5)||(old_mult!=proposalMultiplier)){
+               // Convergence control based on acceptance ratio. {{{
+               maxIter++;
+               if(maxIter>MAX_ITER){
+                  if(args.flag("veryVerbose"))
+                     message("(BREAKED acceptR %Lg mult %Lg)\n",acceptR,proposalMultiplier);
+                  if(storeAll)
+                     paramsF<<"#(BREAKED acceptR "<<acceptR<<" mult "<<proposalMultiplier<<")"<<endl;
+                  breaked=true;
+                  break;
+               }
+               if((alpha>MAX_PARAM)||(beta>MAX_PARAM)){
+                  if(args.flag("veryVerbose"))
+                     message("(OVERFLOW acceptR %Lg mult %Lg)\n",acceptR,proposalMultiplier);
+                  if(storeAll)
+                     paramsF<<"#(OVERFLOW acceptR "<<acceptR<<" mult "<<proposalMultiplier<<")"<<endl;
+                  breaked=true;
+                  break;
+               }
+               old_mult=proposalMultiplier;
+               if(acceptR<0.25)proposalMultiplier/=1.02;
+               if(acceptR>0.5)proposalMultiplier*=1.02;
+               if(old_mult!=proposalMultiplier){
+                  normalDistributionA.param(nDP(0,ALPHA_PROP*proposalMultiplier));
+                  normalDistributionB.param(nDP(0,BETA_PROP*proposalMultiplier));
+               }
+               //}}}
+               acceptR=0;
+               R_INTERUPT;
+               for(i=0;i<1000;i++){ // Sampling 1000 samples {{{
+                  alphaP = alpha + normalDistributionA(rng_mt);
+                  if(alphaP<0)alphaP = -alphaP;
+                  betaP= beta + normalDistributionB(rng_mt);
+                  if(betaP<0)betaP = -betaP;
+                  if((alphaP==0)||(betaP==0)){
+                     prob=0;
+                  }else{
+                     prob = 1.0;
+                     probAll = pow(betaP,alphaP) / pow(beta,alpha);
+                     for(c=0;c<C;c++){
+                        probC = lgamma(alphaP + cond.getRC(c)/2.0)+
+                            lgamma(alpha) -
+                            lgamma(alpha + cond.getRC(c)/2.0) -
+                            lgamma(alphaP);
+                        probC = probAll * exp(probC);
+                        for(m=0;m<subM;m++){
+      //                  message(" (var_g %lg) (pow %lg %lg %lg) ",bAdd[g]/2.0,pow(beta+bAdd[g]/2, alpha),pow(betaP+bAdd[g]/2, alphaP),pow((beta+bAdd[g]/2)/(betaP+bAdd[g]/2),SUB_N/2));
+                           prob *= probC;
+                           prob *= pow(beta+bAdd[m][c], alpha) / 
+                                   pow(betaP+bAdd[m][c], alphaP); 
+                           prob *= pow( (beta+bAdd[m][c])/(betaP+bAdd[m][c]), (long double)(cond.getRC(c)/2.0));
+                        }
+                     }
+                     if((prob>1.0)||(uniformDistribution(rng_mt)< prob)){
+                        alpha=alphaP;
+                        beta=betaP;
+                        acceptR++;
+                     }
+                  }
+               } //}}}
+               acceptR/=i;
+            }
+            // Save generated parameters {{{
+            if(storeAll)
+               paramsF<<"#(acceptR "<<acceptR<<" mult "<<proposalMultiplier<<" iter "<<maxIter<<")"<<endl;
+            if(!breaked){
+               if(args.flag("veryVerbose")) message("%Lg  %Lg\n",alpha,beta);
+               if(storeAll) paramsF<<alpha<<" "<<beta<<" "<<mean<<endl;
+               param.expr=mean;
+               param.alpha=alpha;
+               param.beta=beta;
+               params.push_back(param);
+            }else{
+               if(args.flag("veryVerbose")) message("# %Lg %Lg %Lg\n",alpha,beta,mean);
+               if(storeAll) paramsF<<"# "<<alpha<<"  "<<beta<<endl;
+               proposalMultiplier=2;
+               normalDistributionA.param(nDP(0,ALPHA_PROP*proposalMultiplier));
+               normalDistributionB.param(nDP(0,BETA_PROP*proposalMultiplier));
+               alpha=uniformDistribution(rng_mt)*10.0;
+               beta=uniformDistribution(rng_mt)*5.0;
+               if(samplesREDO<MAX_RETRIES){
+                  samplesREDO++;
+               }
+            }
+            //}}}
+         }
+         if((args.verbose)&&(!args.flag("veryVerbose"))){
+            messageF(".");
+         }
+      }
+      cond.close();
+      if(storeAll)paramsF.close();
+      outF<<"# lambda0 "<<args.getD("lambda0")<<endl;
+      if(args.verbose)message("\nSampling done.\n");
+   }
+   sort(params.begin(),params.end());
+   long pAll=(long)params.size(), pDistinct;
+   if(args.verbose)message("Have %ld parameters to smooth.\n",pAll);
+   vector<double> exp(pAll),alp(pAll),bet(pAll),alpS,betS;
+   for(i=0;i<pAll;i++){
+      exp[i]=params[i].expr;
+      alp[i]=params[i].alpha;
+      bet[i]=params[i].beta;
+   }
+   double f = args.getD("lowess-f");
+   long iter = args.getL("lowess-steps"),iterAdd;
+   bool redoSmooth;
+   for(iterAdd=0;iterAdd<6;iterAdd++){ // Increase iteration if anything is <=0
+      redoSmooth = false;
+      lowess(exp,alp,f,iter+iterAdd,alpS);
+      for(i=0;i<pAll;i++)
+         if(alpS[i]<=0){
+            redoSmooth = true;
+            if(args.flag("veryVerbose"))message(" negative alpha: %lg exp: %lg\n",alpS[i],exp[i]);
+         }
+      if(!redoSmooth)break;
+      if(args.verbose)message("Re-Smoothing alpha.\n");
+   }
+   outF<<"# alphaSmooth f: "<<f<<" nSteps: "<<iter+iterAdd<<endl;
+   if(args.verbose)message("# alphaSmooth f: %lg nSteps: %ld\n",f,iter+iterAdd);
+   if((iterAdd==6)&&(args.flag("noforce"))){
+      error("Main: Unable to produce smooth alpha >0.\nTry adjusting the parameter lowess-f.\n");
+      outF.close();
+      remove(args.getS("outFileName").c_str());
+      return 0;
+   }
+   for(iterAdd=0;iterAdd<6;iterAdd++){ // Increase iteration if anything is <=0
+      redoSmooth = false;
+      lowess(exp,bet,f,iter+iterAdd,betS);
+      for(i=0;i<pAll;i++)
+         if(betS[i]<=0){
+            redoSmooth = true;
+            if(args.flag("veryVerbose"))message(" negative beta: %lg exp: %lg\n",betS[i],exp[i]);
+         }
+      if(!redoSmooth)break;
+      if(args.verbose)message("Re-Smoothing beta.\n");
+   }
+   outF<<"# betaSmooth f: "<<f<<" nSteps: "<<iter+iterAdd<<endl;
+   if(args.verbose)message("# betaSmooth f: %lg nSteps: %ld\n",f,iter+iterAdd);
+   if((iterAdd==6)&&(args.flag("noforce"))){
+      error("Main: Unable to produce smooth beta >0.\nTry adjusting the parameter lowess-f.\n");
+      outF.close();
+      remove(args.getS("outFileName").c_str());
+      return 0;
+   }
+   if(!args.flag("noforce")){
+      for(i=0;i<pAll;i++)
+         while((i<pAll)&&((alpS[i]<=0)||(betS[i]<=0))){
+            message("Removing: %lg %lg %lg\n",alpS[i],betS[i],exp[i]);
+            alpS.erase(alpS.begin()+i); betS.erase(betS.begin()+i); exp.erase(exp.begin()+i);
+            pAll = alpS.size();
+         }
+   }
+   pDistinct = 1;
+   for(i=1;i<pAll;i++)if(exp[i]!=exp[i-1])pDistinct++;
+   outF<<"# PN "<<pDistinct<<" hyperparameters"<<endl;
+   outF<<"# columns: alpha beta expression "<<endl;
+   outF<<alpS[0]<<" "<<betS[0]<<" "<<exp[0]<<endl;
+   for(i=1;i<pAll;i++)
+      if(exp[i]!=exp[i-1])outF<<alpS[i]<<" "<<betS[i]<<" "<<exp[i]<<endl;
+   outF.close();
+   if(args.verbose){message("DONE.\n");timer.stop(1,'m');}
+   return 0;
+}
+
+#ifndef BIOC_BUILD
+int main(int argc,char* argv[]){
+   return estimateHyperPar(&argc,argv);
+}
+#endif
diff --git a/estimateVBExpression.cpp b/estimateVBExpression.cpp
new file mode 100644
index 0000000..9c78b37
--- /dev/null
+++ b/estimateVBExpression.cpp
@@ -0,0 +1,238 @@
+#include "ArgumentParser.h"
+#include "FileHeader.h"
+#include "misc.h"
+#include "MyTimer.h"
+#include "SimpleSparse.h"
+#include "TagAlignments.h"
+#include "transposeFiles.h"
+#include "VariationalBayes.h"
+
+#include "common.h"
+
+SimpleSparse* readData(const ArgumentParser &args, long trM){//{{{
+/*
+ As parse(filename,maxreads=None) in python
+ Python difference:
+  - missing maxreads check 
+    (abort if more than maxreads reads were processed)
+*/
+   long i,j,num,tid;
+   double prb;
+   long Ntotal=0,Nmap=0, M=0;
+   string readId,strand,blank;
+   ifstream inFile;
+   MyTimer timer;
+   TagAlignments *alignments = new TagAlignments();
+
+   // Read alignment probabilities {{{
+   inFile.open(args.args()[0].c_str());
+   FileHeader fh(&inFile);
+   ns_fileHeader::AlignmentFileType format;
+   if((!fh.probHeader(&Nmap,&Ntotal,&M,&format)) || (Nmap ==0)){//{{{
+      error("Prob file header read failed.\n");
+      return NULL;
+   }//}}}
+   if(format == ns_fileHeader::OLD_FORMAT){
+      error("Please use new/log format of Prob file.");
+      return NULL;
+   }
+   message("N mapped: %ld\n",Nmap);
+   messageF("N total:  %ld\n",Ntotal);
+   if(args.verb())message("Reading alignments.\n");
+   alignments->init(Nmap,0,M);
+   long mod=10000;
+   long bad = 0;
+   timer.start();
+   for(i = 0; i < Nmap; i++) {
+      inFile>>readId>>num;
+      if(!inFile.good())break;
+     //    message("%s %ld\n",(readId).c_str(),num);
+      for(j = 0; j < num; j++) {
+         inFile>>tid>>prb;
+         if(inFile.fail()){
+            inFile.clear();
+            // ignore rest of line
+            j=num;
+            // this read goes to noise
+            tid=0;
+            // 10 means either 10 or exp(10), but should be still be large enough
+            prb=10;
+            bad++;
+         }
+         switch(format){
+            case ns_fileHeader::NEW_FORMAT:
+               alignments->pushAlignment(tid, prb);
+               break;
+            case ns_fileHeader::LOG_FORMAT:
+               alignments->pushAlignmentL(tid, prb);
+               break;
+            default:;
+         } 
+      }
+      // ignore rest of line
+      inFile.ignore(10000000,'\n');
+
+      alignments->pushRead();
+      
+      R_INTERUPT;
+      if(args.verb() && (i % mod == 0) && (i>0)){
+         message("  %ld ",i);
+         timer.split();
+         mod*=10;
+      }
+   }
+   if(bad>0)warning("Main: %ld reads' alignment information were corrupted.\n",bad);
+   inFile.close();
+   long Nhits,NreadsReal;
+   alignments->finalizeRead(&M, &NreadsReal, &Nhits);
+   // Increase M based on number of transcripts in trInfo file.
+   if(M<trM)M = trM;
+   //}}}
+   if(i<Nmap)message("Read only %ld reads.\n",NreadsReal);
+   message("All alignments: %ld\n",Nhits);
+   messageF("Isoforms: %ld\n",M);
+   Nmap = NreadsReal;
+
+   SimpleSparse *beta = new SimpleSparse(Nmap, M, Nhits);
+
+   for(i=0;i<=Nmap;i++)beta->rowStart[i]=alignments->getReadsI(i);
+   for(i=0;i<Nhits;i++){
+      beta->val[i]=alignments->getProb(i);
+      beta->col[i]=alignments->getTrId(i);
+   }
+
+   delete alignments;
+   return beta;
+}//}}}
+
+extern "C" int estimateVBExpression(int *argc, char* argv[]) {//{{{
+string programDescription =
+"Estimates expression given precomputed probabilities of (observed) reads' alignments.\n\
+   Uses Variational Bayes algorithm to produce parameters for distribution of relative abundances.\n";
+   // Set options {{{
+   ArgumentParser args;
+   args.init(programDescription,"[prob file]",1);
+   args.addOptionS("o","outPrefix","outFilePrefix",1,"Prefix for the output files.");
+   args.addOptionS("O","outType","outputType",0,"Output type (theta, RPKM, counts) of the samples sampled from the distribution.","theta");
+   args.addOptionS("t","trInfoFile","trInfoFileName",0,"File containing transcript information. (Necessary for RPKM samples)");
+   args.addOptionL("P","procN","procN",0,"Limit the maximum number of threads to be used.",4);
+   args.addOptionS("m","method","optMethod",0,"Optimization method (steepest, PR, FR, HS).","FR");
+   args.addOptionL("s","seed","seed",0,"Random initialization seed.");
+   args.addOptionL("","maxIter","maxIter",0,"Maximum number of iterations.",(long)1e4);
+   args.addOptionD("","optLimit","limit",0,"Optimisation limit in terms of minimal gradient or change of bound.",1e-5); 
+   args.addOptionL("","samples","samples",0,"Number of samples to be sampled from the distribution.");
+   args.addOptionB("V","veryVerbose","veryVerbose",0,"More verbose output, better if output forwarded into file.");
+   if(!args.parse(*argc,argv))return 0;
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+   OPT_TYPE optM;
+   if(args.isSet("optMethod")){
+      if((args.getLowerS("optMethod")=="steepest")||
+         (args.getLowerS("optMethod")=="vbem"))optM = OPTT_STEEPEST;
+      else if(args.getLowerS("optMethod")=="pr")optM = OPTT_PR;
+      else if(args.getLowerS("optMethod")=="fr")optM = OPTT_FR;
+      else if(args.getLowerS("optMethod")=="hs")optM = OPTT_HS;
+      else optM = OPTT_FR;
+   }else  optM = OPTT_FR;
+   args.updateS("outputType", ns_expression::getOutputType(args, "theta"));
+   if(args.getS("outputType") == "tau"){
+      error("Main: 'tau' is not valid output type.\n");
+      return 1;
+   }
+   // }}}
+   MyTimer timer;
+   timer.start(2);
+   long M = 0; 
+   SimpleSparse *beta;
+   TranscriptInfo trInfo;
+
+   // {{{ Read transcriptInfo and .prob file 
+   if((!args.isSet("trInfoFileName"))||(!trInfo.readInfo(args.getS("trInfoFileName")))){
+      if(args.isSet("samples") && (args.getL("samples")>0) && (args.getS("outputType") == "rpkm")){
+         error("Main: Missing transcript info file. The file is necessary for producing RPKM samples.\n");
+         return 1;
+      }
+   }else{
+      M = trInfo.getM()+1;
+   }
+   beta = readData(args,M);
+   if(! beta){
+      error("Main: Reading probabilities failed.\n");
+      return 1;
+   }
+   M = beta->M;
+   if(M<=0){
+      error("Main: Invalid number of transcripts in .prob file.\n");
+      return 1;
+   }
+   // }}}
+
+   if(args.verbose)timer.split();
+
+   if(args.verbose)message("Initializing VB.\n");
+
+   VariationalBayes varB(beta,NULL,ns_misc::getSeed(args),args.getL("procN"));
+   
+   if(args.verbose)timer.split();
+   if(args.verbose)message("Starting VB optimization.\n");
+   
+#ifdef LOG_CONV
+   varB.setLog(args.getS("outFilePrefix")+".convLog",&timer);
+#endif
+
+   // Optimize:
+   if(!args.verbose)varB.beQuiet();
+   varB.optimize(args.flag("veryVerbose"),optM,args.getL("maxIter"),args.getD("limit"),args.getD("limit"));
+
+   if(args.verbose){timer.split(0,'m');}
+   double *alpha = varB.getAlphas();
+   double alphaSum = 0 ;
+   long i;
+   for(i=0;i<M;i++)alphaSum+=alpha[i];
+   ofstream outF;
+   if(! ns_misc::openOutput((args.getS("outFilePrefix")+".m_alphas"), &outF)){
+      return 1;
+   }
+   outF<<"# "<<args.args()[0]<<endl;
+   outF<<"# M "<<M<<"\n"
+         "# List includes also 'noise' transcript (first line)\n"
+         "# <alpha> - parameter of Dirichlet distribution\n"
+         "# <alpha> <beta> - parameters of the marginal Gamma distribution\n"
+         "# columns: <mean theta> <alpha> <beta>"<<endl;
+   outF<<scientific;
+   outF.precision(9);
+   for(i=0;i<M;i++){
+      outF<<alpha[i]/alphaSum<<" "<<alpha[i]<<" "<<alphaSum-alpha[i]<<endl;
+   }
+   outF.close();
+   // free memory
+   delete beta;
+   delete[] alpha;
+   if(args.isSet("samples") && (args.getL("samples")>0)){
+      string outTypeS = args.getS("outputType");
+      string samplesFName = args.getS("outFilePrefix")+".VB" + outTypeS;
+      string samplesTmpName = args.getS("outFilePrefix")+".VB"+outTypeS+"TMP"; 
+      timer.start(0);
+      if(args.verbose)messageF("Generating samples into temporary file %s. ",samplesTmpName.c_str());
+      if(!ns_misc::openOutput(samplesTmpName, &outF)) return 1;
+      // Samples are generated without the "noise transcript".
+      outF<<"# M "<<M-1<<" N "<<args.getL("samples")<<endl;
+      varB.generateSamples(args.getL("samples"), outTypeS, trInfo.getShiftedLengths(), &outF);
+      outF.close();
+      if(args.verbose)timer.split(0);
+      if(transposeFiles(vector<string>(1, samplesTmpName), samplesFName, args.verbose, "")){
+         if(args.verbose)message("Removing temporary file %s.\n", samplesTmpName.c_str());
+         remove(samplesTmpName.c_str());
+      }else {
+         error("Main: Transposing samples failed.\n");
+         return 1;
+      }
+   }
+   if(args.verbose){message("DONE. "); timer.split(2,'m');}
+   return 0;
+}//}}}
+
+#ifndef BIOC_BUILD
+int main(int argc, char* argv[]) {
+   return estimateVBExpression(&argc,argv);
+}
+#endif
diff --git a/extractSamples.cpp b/extractSamples.cpp
new file mode 100644
index 0000000..4c05687
--- /dev/null
+++ b/extractSamples.cpp
@@ -0,0 +1,126 @@
+/*
+ *
+ * Extract samples of given transcripts.
+ *
+ *
+ */
+#include<iostream>
+#include<cstdlib>
+#include<algorithm>
+
+using namespace std;
+
+#include "PosteriorSamples.h"
+#include "ArgumentParser.h"
+#include "common.h"
+
+#define Sof(x) (long)x.size()
+
+vector <long> tokenizeL(const string &input,const string &space = " "){//{{{
+   vector <long> ret;
+   long pos=0,f=0,n=input.size();
+   while((pos<n)&&(f<n)&&(f>=0)){
+      f=input.find(space,pos);
+      if(f==pos)pos++;
+      else{
+         if((f <n)&&(f>=0)){
+            ret.push_back(atoi(input.substr(pos,f-pos).c_str()));
+            pos=f+1;
+         }
+      }
+   }
+   if(pos<n)ret.push_back(atoi(input.substr(pos,n-pos).c_str()));
+   return ret;
+} //}}}
+
+int main(int argc,char* argv[]){
+   srand(time(NULL));
+   string programDescription=
+"Extracts MCMC samples of selected transcripts.\n\
+   [sampleFiles] should contain transposed MCMC samples.";   
+   // Set options {{{
+   ArgumentParser args(programDescription,"[sampleFiles]",1);
+   args.addOptionS("o","outFile","outFileName",1,"Name of the output file.");
+   args.addOptionS("L","list","list",0,"Comma delimited list of ZERO-BASED transcript ids (i.e. lines) which should be extracted: 0,17,47,1024,4777");
+   args.addOptionL("r","random","randomN",0,"Choose random [randomN] transcripts.");
+   if(!args.parse(argc,argv))return 0;
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+   // }}}
+   long i,j,c,C,N,M=0,S;
+   vector<long> trList;
+   Conditions samples;
+
+   // Initialize samples reader
+   if( (!samples.init("NONE", args.args(), &C, &M, &N)) || (C<=0) || (M<=0) || (N<=0)){
+      cerr<<"ERROR: Main: Failed loading MCMC samples."<<endl;
+      return 1;
+   }
+   C=samples.getRN();
+   if(args.isSet("list")){
+      // Process transcripts list:
+      trList = tokenizeL(args.getS("list"),",");
+      sort(trList.begin(),trList.end());
+      // Erase invalid and duplicate IDs
+      for(i=0;i<Sof(trList);i++){
+         if((trList[i]<0)||(trList[i]>=M)||((i>0)&&(trList[i]==trList[i-1]))){
+            trList.erase(trList.begin()+i);
+            i--;
+         }
+      }   
+      S=Sof(trList);
+      if(S==0){
+         cerr<<"ERROR: Main: No valid transcript IDs supplied."<<endl;
+         return 1;
+      }
+   }else if(args.isSet("randomN")){
+      // Create list of [randomN] random transcripts
+      S = args.getL("randomN");
+      if((S<=0)||(S>M)){
+         cerr<<"ERROR: Main: Wrong number of transcripts ot output: "<<S<<"."<<endl;
+         return 1;
+      }
+      for(i=0;i<S;i++){
+         j = rand()%M;
+         while(find(trList.begin(),trList.end(),j)!=trList.end())
+            j = rand()%M;
+         trList.push_back(j);
+      }
+      sort(trList.begin(),trList.end());
+   }else{
+      cerr<<"ERROR: Main: Need to specify at least one of --list or --random."<<endl;
+      return 1;
+   }
+   if(args.verbose)cout<<"C: "<<C<<" samples: "<<N<<"\ntranscripts: "<<M<<"\nselected: "<<S<<endl;
+   
+   // Open output file and write header
+   ofstream outFile(args.getS("outFileName").c_str());
+   if(! outFile.is_open()){
+      cerr<<"ERROR: Main: File write failed!"<<endl;
+      return 1;
+   }
+   outFile<<"# Selected transcripts from: ";
+   for(i=0;i<C;i++)outFile<<args.args()[i]<<",";
+   outFile<<endl;
+   outFile<<"# transcripts(zero-based): "<<trList[0];
+   for(i=1;i<S;i++)outFile<<","<<trList[i];
+   outFile<<"\n# T (M rows,N cols)\n";
+   outFile<<"# C "<<C<<" (conditions)\n";
+   outFile<<"# M "<<S<<" (out of: "<<M<<")\n# N "<<N<<endl;
+   outFile.precision(9);
+   outFile<<scientific;
+
+   // Copy samples
+   vector<double> tr;
+   for(j=0;j<S;j++){
+      if(args.verbose)cout<<trList[j]<<" ";
+      cout.flush();
+      for(c=0;c<C;c++){
+         samples.getTranscript(c,trList[j], tr);
+         for(i=0;i<N;i++)outFile<<tr[i]<<" ";
+         outFile<<endl;
+      }
+   }
+   outFile.close();
+   if(args.verbose)cout<<"DONE"<<endl;
+   return 0;
+}
diff --git a/extractTranscriptInfo.py b/extractTranscriptInfo.py
new file mode 100755
index 0000000..879076c
--- /dev/null
+++ b/extractTranscriptInfo.py
@@ -0,0 +1,91 @@
+#!/usr/bin/python
+# Initialization {{{
+import sys
+from optparse import OptionParser
+parser = OptionParser(usage="%prog [options] <inputFile> <outputFile>\n\n\
+      This program extracts information about transcripts from reference Fasta file.\n\
+      This is partially replaced by using SAM header, which however does not include information about transcript-gene grouping.\n\
+      Current version of parseAlignment extracts this information from a reference sequence file (making this script obsolete).\
+")
+parser.add_option("-v", "--verbose", default=False, dest="verbose",  action="store_true", help="Verbose output")
+parser.add_option("-t","--type",dest="type", type="string",help="Type of file to parse: ensembl, cuff, other");
+
+(options, args) = parser.parse_args()
+def verbose(str):
+   if options.verbose:
+      print str;
+
+if len(args)<2: 
+   sys.exit("Missing arguments");
+
+try:
+   inF = open(args[0],"r");
+except:
+   sys.exit("Unable to open input file: "+args[0]+" .");
+
+
+try:
+   outF = open(args[1],"w");
+except:
+   sys.exit("Unable to open output file: "+args[1]+" .");
+#}}}
+
+seqName="";
+geneName="";
+seqLen=0;
+seqCount=0;
+
+result = [];
+li = 0;
+
+if options.type:
+   if options.type=="ensembl": 
+      itype = "ens";
+      print "Expecting header line format:\n>[tr Name] .* gene:[gene Name] .*";
+   elif options.type=="cuff":
+      itype = "cuf";
+      print "Expecting header line format:\n>[tr Name] .* gene=[gene Name] .*";
+   else:
+      itype = "non";
+      print "Expecting header line format:\n>[tr Name] .*\n -> using \"none\" as gene names";
+else:
+   itype = "non";
+   print "Expecting header line format:\n>[tr Name] .*\n -> using \"none\" as gene names";
+
+for line in inF:
+   li+=1;
+   if line[0] == '>':
+      if seqName!="":
+         result.append([geneName,seqName,str(seqLen)]);
+      seqLen=0;
+      seqCount+=1;
+      # Split line after >
+      lSplit = line[1:].split()
+      seqName = lSplit[0];
+      if seqName == "":
+         seqName = "unknown-tr"+str(seqCount);
+         print "Warning: no name on line ",li,". Using '",seqName,"'.";
+      if itype == "non":
+         geneName = "none";
+      else:
+         geneName = ""
+         for it in lSplit:
+            if (itype=="ens" and "gene:" in it) or (itype=="cuf" and "gene=" in it) :
+               geneName=it[5:];
+         if geneName == "":
+            geneName = seqName;
+   else:
+      seqLen+=len(line)-1;
+if seqName!="":
+   result.append([geneName,seqName,str(seqLen)]);
+
+inF.close();
+
+verbose(str(seqCount)+" sequences processed.");
+
+outF.write("# M "+str(seqCount)+"\n");
+for it in result:
+   outF.write(it[0]+" "+it[1]+" "+it[2]+"\n");
+
+outF.close();
+
diff --git a/getCounts.py b/getCounts.py
new file mode 100755
index 0000000..44e514b
--- /dev/null
+++ b/getCounts.py
@@ -0,0 +1,78 @@
+#!/usr/bin/python
+# Initialization {{{
+import sys
+import numpy as np
+#import os, time # needed for this:
+#time_str = time.strftime("%b %e %Y %H:%M:%S", time.gmtime(os.lstat(sys.argv[0]).st_mtime));
+#print "###",os.path.basename(sys.argv[0]),"build:",time_str;
+
+from optparse import OptionParser
+parser = OptionParser(usage="%prog [options] [<inputFile.thetaMeans>]+\n\n\
+      This program reads supplied .thetaMeans files and using either information from .prob files or Nmap option generates read counts for each input file provided.")
+parser.add_option("-o", "--outFile", dest="out", help="Output file", type="string")
+parser.add_option("-v", "--verbose", default=False, dest="verbose",  action="store_true", help="Verbose output")
+parser.add_option("-p", "--probDir", dest="probDir", help="Directory with .prob files. The program will look in here for files with same name except fot extension .prob in order to find out total-aligned-read counts for each experiment.", type="string")
+parser.add_option("-n", "--Nmap", dest="Nmap", help = "Comma separated list of total aligned-read-counts for each experiment.",type="string");
+def verbose(str):
+   if options.verbose:
+      print str;
+(options, args) = parser.parse_args()
+
+if len(args)==0:
+   sys.exit("Please supply .thetaMeans filenames as arguments.");
+if not options.out:
+   sys.exit("Please supply output file");
+if (not options.probDir) and (not options.Nmap):
+   sys.exit("Please use either --Nmap or --probDir.");
+#}}}
+
+if options.Nmap:
+   try:
+      N = [ float(it) for it in options.Nmap.split(",")]
+      if len(N) != len(args):
+         raise;
+   except:
+      sys.exit("Unable to turn '"+options.Nmap+"' into "+str(len(args))+" numbers.");
+else:
+   N = []
+   for arg in args:
+      fn = arg.split("/")[-1];
+      if fn[-11:] == '.thetaMeans':
+         fn = options.probDir +"/"+fn[:-11]+".prob";
+      else:
+         fn = options.probDir +"/"+fn+".prob";
+      try:
+         inF = open(fn);
+      except:
+         sys.exit("Unable to open file: "+fn);
+      print "Reading file: ",fn;
+      Nmap = 0;
+      for line in inF:
+         if line[0]!="#": break;
+         ls=line.split();
+         for i in xrange(len(ls)-1): 
+            if ls[i] == "Nmap": Nmap = int(ls[i+1]);
+      inF.close();
+      if Nmap <= 0:
+         sys.exit("Unable to find valid Nmap in: "+fn);
+      N.append(Nmap);
+
+
+means = [np.transpose(np.loadtxt(arg))[1] for arg in args];
+print "Files:";
+for j in xrange(len(args)):
+   print "  ",args[j],N[j];
+
+try:
+   outF = open(options.out,"w");
+except:
+   sys.exit("Unable to open output file: ",options.out);
+
+for i in xrange(len(means[0])):
+   for j in xrange(len(means)):
+      outF.write(str(long(round(means[j][i]*N[j])))+" ");
+   outF.write("\n");
+
+outF.close();
+
+
diff --git a/getFoldChange.cpp b/getFoldChange.cpp
new file mode 100644
index 0000000..7f1a345
--- /dev/null
+++ b/getFoldChange.cpp
@@ -0,0 +1,112 @@
+/*
+ *
+ * Compute Fold Change between expression samples.
+ *
+ *
+ */
+#include <cmath>
+#include <iostream>
+
+using namespace std;
+
+#include "PosteriorSamples.h"
+#include "ArgumentParser.h"
+#include "common.h"
+
+
+int main(int argc,char* argv[]){
+   string programDescription=
+"Computes log_2 Fold Change from MCMC expression samples.\n\
+   [sampleFiles] should contain transposed MCMC samples from replicates.\n\
+                  (use --log option if they are not logged)";   
+   // Set options {{{
+   ArgumentParser args(programDescription,"[sampleFiles]",1);
+   args.addOptionS("o","outFile","outFileName",1,"Name of the output file.");
+   args.addOptionB("l","log","log",0,"Use logged values.");
+//   args.addOptionS("t","type","type",0,"Type of variance, possible values: [sample,sqDif] for sample variance or sqared difference.","sample");
+   if(!args.parse(argc,argv))return 0;
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+   // }}}
+   bool doLog=args.flag("log");
+   if(doLog){
+      if(args.verbose)cout<<"Will log expression samples to produce log_2 Fold Chnage."<<endl;
+   }else{
+      if(args.verbose)cout<<"Assuming samples are logged, producing log_2 Fold Change."<<endl;
+   }
+   
+   long i,j,r,N,RN,M=0,C;
+
+   Conditions cond;
+   if(! (cond.init("NONE", args.args(), &C, &M, &N))){
+      cerr<<"ERROR: Main: Failed loading MCMC samples."<<endl;
+      return 0;
+   }
+   RN=cond.getRN();  
+   if((RN>2)&&(C!=2)){//{{{
+      cout<<"Please specify exactly 2 conditions when using more than two sample files.\n";
+      cout<<"  such as: [sample Files from first condition] C [sample files from second condition]"<<endl;
+      return 0;
+   }//}}}
+   if(args.verbose)cout<<"Samples: "<<N<<" transcripts: "<<M<<endl;
+   
+   ofstream outFile(args.getS("outFileName").c_str());
+   if(! outFile.is_open()){
+      cerr<<"ERROR: Main: File write failed!"<<endl;
+      return 0;
+   }
+   outFile<<"# log_2 Fold Change in expression."<<endl;
+   outFile<<"# files: ";
+   for(r=0;r<2;r++)outFile<<args.args()[r]<<" ";
+   outFile<<endl;
+   outFile<<"# T (M rows,N cols)"<<endl; 
+   outFile<<"# M "<<M<<endl;
+   outFile<<"# N "<<N<<endl;
+   vector<double> tr,tr2,res(N); 
+   double l2=log(2.0);
+   long RC;
+   for(j=0;j<M;j++){
+      if(args.verbose)progressLog(j,M);
+      if(RN==2){
+         if(cond.getTranscript(0,j,tr)&&cond.getTranscript(1,j,tr2)){
+            for(i=0;i<N;i++){
+               if(doLog)outFile<<log(tr2[i]/tr[i])/l2;
+               outFile<<(tr2[i]-tr[i])/l2<<" ";
+            }
+            outFile<<endl;
+         }else{
+            cerr<<"Failed loading "<<j<<" transcript."<<endl;
+         }
+      }else{
+         // Comparing arithmetic means of log samples which are geometric means of samples
+         res.assign(N,0);
+         RC = cond.getRC(1);
+         for(r=0;r< RC;r++){
+            if(cond.getTranscript(1,r,j,tr)){
+               for(i=0;i<N;i++)
+                  if(doLog)res[i]+=log(tr[i])/RC;
+                  else res[i]+=tr[i]/RC;
+            }else{
+               cerr<<"Failed loading "<<j<<" transcript from condition 1 replicate "<<r<<endl;
+            }
+         }
+         RC = cond.getRC(0);
+         for(r=0;r<RC;r++){
+            if(cond.getTranscript(0,r,j,tr)){
+               for(i=0;i<N;i++)
+                  if(doLog)res[i]-=log(tr[i])/RC;
+                  else res[i]-=tr[i]/RC;
+            }else{
+               cerr<<"Failed loading "<<j<<" transcript from condition 0 replicate "<<r<<endl;
+            }
+         }
+         for(i=0;i<N;i++)
+            outFile<<res[i]/l2<<" ";
+         outFile<<endl;
+      }
+   }
+   cond.close();
+   
+   outFile.close();
+   if(args.verbose)cout<<"DONE"<<endl;
+   return 0;
+}
diff --git a/getGeneExpression.cpp b/getGeneExpression.cpp
new file mode 100644
index 0000000..80fb989
--- /dev/null
+++ b/getGeneExpression.cpp
@@ -0,0 +1,120 @@
+/*
+ *
+ * Produce overall gene expression
+ *
+ */
+#include<cmath>
+
+using namespace std;
+
+#include "ArgumentParser.h"
+#include "misc.h"
+#include "PosteriorSamples.h"
+#include "TranscriptInfo.h"
+
+#include "common.h"
+
+extern "C" int getGeneExpression(int *argc,char* argv[]){
+   string programDescription=
+"Computes expression of whole genes.\n\
+   [samplesFile] should contain transposed MCMC samples which will be transformed into gene expression samples.";   
+   // Set options {{{
+   ArgumentParser args(programDescription,"[samplesFile]",1);
+   args.addOptionS("t","trInfoFile","trInfoFileName",1,"Name of the transcript file.");
+   args.addOptionB("a","adjustByLength","adjust",0,"Adjust expression by transcripts length.");
+   args.addOptionB("","theta2rpkm","rpkm",0,"Transform transcript expression in theta to gene expression in RPKM.");
+   args.addOptionS("o","outFile","outFileName",1,"Name of the output file.");
+   args.addOptionB("l","log","log",0,"Output logged values.");
+   args.addOptionS("T","trMap","trMapFile",0,"Name of the file containing transcript to gene mapping.");
+   args.addOptionS("G","geneList","geneListFile",0,"Name of the file containing list of gene names (one for each transcript).");
+   args.addOptionB("","updateTrFile","updateTrFile",0,"Update trInfoFile if new gene names were provided (with trMapFile or geneListFile).");
+   args.addOptionS("g","geneInfoFile","geneInfoFile",0,"Name of while to which gene information will be saved.");
+   if(!args.parse(*argc,argv))return 0;
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+   // }}}
+   bool doLog,doAdjust=args.flag("adjust")||args.flag("rpkm"),doRPKM=args.flag("rpkm");
+   doLog = ns_genes::getLog(args);
+   
+   long N=0,M=0,G;
+   TranscriptInfo trInfo;
+   PosteriorSamples  samples;
+   if(!ns_genes::prepareInput(args, &trInfo, &samples, &M, &N, &G))return 1;
+   if(!ns_genes::updateGenes(args, &trInfo, &G))return 1;
+   if(args.verb())messageF("Genes: %ld\n",G);
+   if(!ns_genes::checkGeneCount(G,M))return 1;
+   if(args.flag("updateTrFile") && (args.isSet("trMapFile") || args.isSet("geneListFile"))){
+      if(args.verb())message("Updating transcript info file with new gene names.\n");
+      if(!trInfo.writeInfo(args.getS("trInfoFileName"), true)){
+         if(args.verb())warning("Main: Updating trInfoFile failed.\n");
+      }
+   }
+   if(args.isSet("geneInfoFile")){
+      if(args.verb())message("Saving gene information into: %s.\n",args.getS("geneInfoFile").c_str());
+      if(!trInfo.writeGeneInfo(args.getS("geneInfoFile"))){
+         warning("Main: Writing gene information failed.\n");
+      }
+   }
+
+   ofstream outFile;
+   if(!ns_misc::openOutput(args, &outFile))return 1;;
+   // Write ouput header {{{
+   outFile<<"# from: "<<args.args()[0]<<"\n# samples of gene expression\n";
+   if(args.verbose)message("Genes will be ordered as they first appear in %s.\n",(args.getS("trInfoFileName")).c_str());
+   outFile<<"# Genes will be ordered as they first appear in "<<args.getS("trInfoFileName")<<"\n";
+   if(doRPKM)outFile<<"# data in RPKM\n";
+   if(doLog)outFile<<"# L \n";
+   outFile<<"# T (M rows,N cols)\n";
+   outFile<<"# G = M "<<G<<"\n# N "<<N<<endl;
+   // Set precision.
+   outFile.precision(9);
+   outFile<<scientific;
+   // }}}
+   vector< vector<double> > trs;
+   vector<long double> normals(N,0);
+   long double sum;
+   long i,j,g,gM,m;
+   if(doAdjust){
+      vector<double> tr(M);
+      if(args.verbose)message("Computing normalization constants, because of length adjustment.\n");
+      for(j=0;j<M;j++){
+         if(args.verbose)progressLog(j,M);
+         samples.getTranscript(j,tr);
+         for(i=0;i<N;i++)
+            normals[i] += tr[i]/trInfo.L(j);
+      }
+   }
+   if(args.verbose)message("Computing gene expression.\n");
+   for(g=0;g<G;g++){
+      if(args.verbose)progressLog(g,G);
+      gM = trInfo.getGtrs(g).size();
+      if((long)trs.size()<gM)trs.resize(gM);
+      for(j=0;j<gM;j++){
+         m = trInfo.getGtrs(g)[j];
+         samples.getTranscript( m , trs[j]);
+      }
+      for(i=0;i<N;i++){
+         sum = 0;
+         for(j=0;j<gM;j++){
+            if(doAdjust&&(normals[i]>0)){
+               m = trInfo.getGtrs(g)[j];
+               sum+=(trs[j][i] / trInfo.L(m)) / normals[i];
+            }else{
+               sum+=trs[j][i];
+            }
+         }
+         if(doRPKM)sum=sum*10e9;
+         if(doLog)sum=log(sum);
+         outFile<<sum<<" ";
+      }
+      outFile<<endl;
+   }
+   outFile.close();
+   if(args.verbose)message("DONE\n");
+   return 0;
+}
+
+#ifndef BIOC_BUILD
+int main(int argc,char* argv[]){
+   return getGeneExpression(&argc,argv);
+}
+#endif
diff --git a/getPPLR.cpp b/getPPLR.cpp
new file mode 100644
index 0000000..2fb6d95
--- /dev/null
+++ b/getPPLR.cpp
@@ -0,0 +1,152 @@
+#include <algorithm>
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+using namespace std;
+
+#include "ArgumentParser.h"
+#include "common.h"
+#include "misc.h"
+#include "PosteriorSamples.h"
+
+int main(int argc,char* argv[]){
+   string programDescription=
+"Computes PPLR from MCMC expression samples.\n"
+"   (the probability of second condition being up-regulated)\n"
+"   Also computes log2 fold change with confidence intervals, and condition mean log expression.\n"
+"   [sampleFiles] should contain transposed MCMC samples from different conditions.";
+   // Set options {{{
+   ArgumentParser args(programDescription,"[sampleFile-C1] [sampleFile-C1]",1);
+   args.addOptionS("o","outFile","outFileName",1,"Name of the output file.");
+   args.addOptionB("","inputIsLogged","logged",0,"Indicate that the input expression estimates are on log scale. (Not necessary to use with data generated by BitSeq-0.5.0 and above.)");
+   args.addOptionB("d","distribution","distribution",0,"Produce whole distribution of differences.");
+   args.addOptionS("s","selectFile","selectFileName",0,"File containing list of selected transcript IDs (zero based), only these will be reported. Only works with --distribution option.");
+   args.addOptionD("","subSample","subSample",0,"Sub-sample the distributions using a given fraction of expression samples.",1.0);
+   if(!args.parse(argc,argv))return 0;
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+   // }}}
+
+   long i,m,N,M;
+   bool getAll=false, doLog = true;
+   vector<long> trSelect;
+   if(! args.isSet("selectFileName")){
+      getAll=true;
+   }else{
+      ifstream selectF (args.getS("selectFileName").c_str());
+      if(! selectF.is_open()){
+         cerr<<"ERROR: Main: Failed loading selected transcripts."<<endl;
+         return 1;
+      }
+      selectF>>m;
+      while(selectF.good()){
+         trSelect.push_back(m);
+         selectF>>m;
+      }
+      selectF.close();
+      sort(trSelect.begin(),trSelect.end());
+   }
+
+   Conditions cond;
+   if(! cond.init("NONE", args.args(), &M, &N)){
+      cerr<<"ERROR: Main: Failed loading conditions."<<endl;
+      return 1;
+   }
+   if(cond.logged() || args.flag("logged")) {
+      doLog = false;
+      if(args.verbose)cout<<"Assuming values are logged already."<<endl;
+   }else {
+      doLog = true;
+      if(args.verbose)cout<<"Will use logged values."<<endl;
+   }
+   if(args.verbose)cout<<"M "<<M<<"   N "<<N<<endl;
+   ofstream outFile(args.getS("outFileName").c_str());
+   if(! outFile.is_open()){
+      cerr<<"ERROR: Main: File write probably failed!"<<endl;
+      return 1;
+   }
+   if(getAll){
+      trSelect.resize(M);
+      for(i=0;i<M;i++)trSelect[i]=i;
+   }
+   
+   vector<vector<double> > tr(2);
+   vector<double> difs;
+   long subN = N;
+   double frac = args.getD("subSample");
+   if((frac > 0) && (frac < 1))subN = (long)(N * frac);
+   if(subN<1){
+      cerr<<"ERROR: The fraction of samples for sub-sampling is too small."<<endl;
+      return 1;
+   }
+   if((args.getD("subSample")!=1) && args.verbose){
+      cout<<"Using "<<subN<<" samples for sub-sampling."<<endl;
+   }
+   double pplr,mu_0,mu_1,log2FC,ciLow,ciHigh;
+   if(! args.flag("distribution")){
+      if(args.verbose)cout<<"Counting PPLR"<<endl;
+      outFile<<"# Computed PPLR, log2 fold change with 95\% confidence intervals, condition mean log expression."<<endl;
+      outFile<<"# M "<<M<<"\n# columns:"<<endl;
+      outFile<<"# PPLR log2FoldChange ConfidenceLow ConfidenceHigh MeanLogExpressionC1 MeanLogExpressionC2"<<endl;
+      for(m=0;m<M;m++){
+         if(args.verbose)progressLog(m,M);
+         cond.getTranscript(0,m,tr[0],subN);
+         cond.getTranscript(1,m,tr[1],subN);
+         difs.resize(subN);
+         pplr = log2FC = mu_0 = mu_1 = 0;
+         for(i=0;i<subN;i++){
+            if(doLog){
+               if((tr[0][i] <= 0) || (tr[1][i] <= 0)){
+                  cerr<<"ERROR: Found non-positive expression (transcript: "<<m<<").\n"
+                        "       The expression is probably in log scale already.\n"
+                        "       Please check your data and use --inputIsLogged if that is the case."
+                      <<endl;
+                  return 1;
+               }
+               tr[1][i] = log(tr[1][i]);
+               tr[0][i] = log(tr[0][i]);
+            }
+            if(tr[1][i]>tr[0][i])pplr+=1;
+            difs[i]=tr[1][i]-tr[0][i];
+            log2FC+=tr[1][i]-tr[0][i];
+            mu_0 += tr[0][i];
+            mu_1 += tr[1][i];
+         }
+         pplr /= subN;
+         mu_0 /= subN;
+         mu_1 /= subN;
+         log2FC /= subN*log(2);
+         ns_misc::computeCI(95, &difs, &ciLow, &ciHigh);
+         ciLow /= log(2);
+         ciHigh /= log(2);
+         outFile<<pplr<<" "<<log2FC<<" "<<ciLow<<" "<<ciHigh<<" "<<mu_0<<" "<<mu_1<<endl;
+      }
+   }else{
+      if(args.verbose)cout<<"Computing Log Ratio distribution"<<endl;
+      long selectM = trSelect.size();
+      outFile<<"# Log Ratio distribution"<<endl;
+      outFile<<"# T "<<endl;
+      outFile<<"# M "<<selectM<<endl;
+      outFile<<"# N "<<subN<<endl;
+      outFile<<"# first column - transcript number (zero based)"<<endl;
+      for(m=0;m<selectM;m++){
+         if(selectM>10)progressLog(m,M);
+         cond.getTranscript(0,trSelect[m],tr[0],subN);
+         cond.getTranscript(1,trSelect[m],tr[1],subN);
+         outFile<<trSelect[m]<<" ";
+         for(i=0;i<subN;i++){
+            if(doLog){
+               tr[1][i] = log(tr[1][i]);
+               tr[0][i] = log(tr[0][i]);
+            }
+            outFile<<tr[1][i]-tr[0][i]<<" ";
+         }
+         outFile<<endl;
+      }
+   }
+   outFile.close();
+   cond.close();
+   return 0;
+}
diff --git a/getVariance.cpp b/getVariance.cpp
new file mode 100644
index 0000000..e2bdba2
--- /dev/null
+++ b/getVariance.cpp
@@ -0,0 +1,167 @@
+/*
+ *
+ * Compute posterior variance of samples.
+ *
+ *
+ */
+#include<cmath>
+
+using namespace std;
+
+#include "ArgumentParser.h"
+#include "misc.h"
+#include "PosteriorSamples.h"
+
+#include "common.h"
+
+extern "C" int getVariance(int *argc,char* argv[]){
+   string programDescription=
+"Estimates variance of MCMC samples from 1 or multiple replicates.\n\
+   [sample Files] should contain transposed MCMC samples from replicates.";   
+   // Set options {{{
+   ArgumentParser args(programDescription,"[sampleFiles]",1);
+   args.addOptionS("o","outFile","outFileName",1,"Name of the output file.");
+   args.addOptionB("l","log","log",0,"Use logged values.");
+   args.addOptionS("t","type","type",0,"Type of variance, possible values: [sample,sqDif] for sample variance or squared difference.","sample");
+   args.addOptionS("","norm","normalization",0,"Normalization constants for each input file provided as comma separated list of doubles (e.g. 1.0017,1.0,0.9999 ).");
+   if(!args.parse(*argc,argv)){return 0;}
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+   bool doLog=args.flag("log"),logged=false;
+   // }}}
+   
+   long i,j,r,N,RN,M=0;
+
+   Conditions cond;
+   if(! (cond.init("NONE", args.args(), &M, &N))){
+      error("Main: Failed loading MCMC samples.\n");
+      return 1;
+   }
+   if(doLog){ 
+      logged = true;
+      if(cond.logged()){
+         doLog=false;
+         if(args.verbose)message("Samples are already logged, computing mean.\n");
+      }else{
+         if(args.verbose)message("Using logged values.\n");
+      }
+   }else{
+      if(args.verbose)message("NOT using logged values.\n");
+      if(cond.logged())logged=true;
+   }
+   if(args.isSet("normalization")){
+      if(! cond.setNorm(args.getTokenizedS2D("normalization"))){
+         error("Main: Applying normalization constants failed.\n");
+         return 1;
+      }
+   }
+   RN=cond.getRN();  
+   if((args.getS("type")=="sqDif")&&(RN>2)&&(args.verbose)){//{{{
+      i=0;
+      while(args.args()[i]=="C")i++;
+      message("using only: %s ",(args.args()[i]).c_str());
+      i++;
+      while(args.args()[i]=="C")i++;
+      message("%s\n",(args.args()[i]).c_str());
+   }//}}}
+   if(args.verbose)message("replicates: %ld samples: %ld transcripts: %ld\n",RN,N,M);
+   
+   ofstream outFile(args.getS("outFileName").c_str());
+   if(! outFile.is_open()){
+      error("Main: File write failed!\n");
+      return 1;
+   }
+   vector<double> mean(M),var(M);
+   vector<double> tr,tr2; 
+   double m,mSq,count,sqDif;
+   bool good=true;
+   if(args.getS("type")=="sample"){ //{{{
+      for(j=0;j<M;j++){
+         if((j%10000==0)&&(j>0)&&args.verbose)message("%ld\n",j);
+         
+         m = mSq = count = 0;
+         for(r=0;r<RN;r++){
+            if(cond.getTranscript(r,j,tr,N/RN)){
+               for(i=0;i<N/RN;i++){
+                  if(doLog){
+                     tr[i]=tr[i]<=0?ns_misc::LOG_ZERO:log(tr[i]);
+                  }
+                  m+=tr[i];
+                  mSq += tr[i]*tr[i];
+               }
+               count+=N/RN;
+            }else{
+               warning("Error at %ld %ld\n",j,r);
+            }
+   //         message("%ld  %ld\n",m,count);
+         }
+         if(count==0){
+            warning("no samples for transcript: %ld.\n",j);
+            //for(i,Sof(tr))message("%lf "=0;i,Sof(tr))message("%lf "<tr[i];i,Sof(tr))message("%lf "++);
+            mean[j] = -47;
+            var[j] = -47;
+         }else{
+            mean[j] = m / count;
+            var[j] = mSq/count - m*m/(count*count);
+         }
+      }//}}}
+   }else{ // "sqDif" {{{
+      for(j=0;j<M;j++){
+         if((j%10000==0)&&(j>0)&&args.verbose)message("%ld\n",j);
+         m = sqDif = 0;
+         if(RN==1){
+            if(! cond.getTranscript(0,j,tr,N)){
+               mean[j] = -47;
+               var[j] = -47;
+               good=false;
+               continue;
+            }
+            tr2.resize(N/2);
+            for(i=0;i<N/2;i++)
+               tr2[i]=tr[i+N/2];
+         }else{
+            if(! (cond.getTranscript(0,j,tr,N/2)&&
+                  cond.getTranscript(1,j,tr2,N/2))){
+               mean[j] = -47;
+               var[j] = -47;
+               good=false;
+               continue;
+            }
+         }
+         if(good){
+            for(i=0;i<N/2;i++){
+               if(doLog){
+                  tr[i]=tr[i]<=0?ns_misc::LOG_ZERO:log(tr[i]);
+                  tr2[i]=tr2[i]<=0?ns_misc::LOG_ZERO:log(tr2[i]);
+               }
+               m+=tr[i]+tr2[i];
+               sqDif+=(tr[i]-tr2[i])*(tr[i]-tr2[i]);
+            }
+            mean[j] = m / N;
+            var[j] = sqDif / N; // == ( sqDif / (N/2) ) / 2
+         }
+      }
+   } //}}}
+   cond.close();
+   
+   outFile<<"# Transcripts mean expression and "<<args.getS("type")<<" variance."<<endl;
+   outFile<<"# files: ";
+   for(r=0;r<RN;r++)outFile<<args.args()[r]<<" ";
+   outFile<<endl;
+   if(logged)outFile<<"# L -> values logged"<<endl;
+   outFile<<"# M "<<M<<endl;
+   (outFile<<scientific).precision(9);
+   for(i=0;i<M;i++){
+      if((mean[i]==-47)&&(var[i]==-47))outFile<<"NaN 0 "<<endl;
+      else outFile<<mean[i]<<" "<<var[i]<<endl;
+   }
+   outFile.close();
+   if(args.verbose)message("DONE\n");
+   return 0;
+}
+
+
+#ifndef BIOC_BUILD
+int main(int argc,char* argv[]){
+   return getVariance(&argc,argv);
+}
+#endif
diff --git a/getWithinGeneExpression.cpp b/getWithinGeneExpression.cpp
new file mode 100644
index 0000000..ca7846f
--- /dev/null
+++ b/getWithinGeneExpression.cpp
@@ -0,0 +1,248 @@
+/*
+ *
+ * Produce relative expression within gene
+ *
+ */
+#include<cmath>
+
+using namespace std;
+
+#include "ArgumentParser.h"
+#include "misc.h"
+#include "PosteriorSamples.h"
+#include "TranscriptInfo.h"
+
+#include "common.h"
+
+namespace ns_withinGene {
+
+// Read transcripts of gene g.
+void readTranscripts(long g, const TranscriptInfo &trInfo, PosteriorSamples *samples, long *gM, vector< vector<double> > *trs);
+
+// Adjust expression samples by transcript length.
+void adjustExpression(long g, const TranscriptInfo &trInfo, vector< vector<double> > *trs);
+
+// Compute sum of samples of transcripts from one gene.
+void getSum(long gM, long N, const vector< vector<double> > &trs, vector<double> *sum);
+
+// Update 'mean' and squareSum with new value.
+void updateSummaries(double x, long double *mean, long double *sqSum, double norm = 1, bool doLog = false);
+
+// Append samples of a transcript into output file.
+void writeTr(long N, const vector<double> &tr, ofstream *outFile);
+
+} // namespace ns_withinGene
+
+extern "C" int getWithinGeneExpression(int *argc,char* argv[]){
+   string programDescription=
+"Computes relative expression of transcripts within genes.\n\
+   [samplesFile] should contain transposed MCMC expression samples.\n\
+   program can produce means and variance and write them into [sumFile]\n\
+   or individual MCMC samples which are written into [outFile].";   
+   // Set options {{{
+   ArgumentParser args(programDescription,"[samplesFile]",1);
+   args.addOptionS("t","trInfoFile","trInfoFileName",1,"Name of the transcript file.");
+   args.addOptionB("a","adjustByLength","adjust",0,"Adjust expression by transcripts length.");
+   args.addOptionS("o","outFile","outFileName",0,"Name of the output file.");
+   args.addOptionS("s","sumFile","sumFileName",0,"Name of summarization file where true mean, true variance and relative mean and relative variance are saved.");
+   args.addOptionB("l","log","log",0,"Use logged values.");
+   args.addOptionS("T","trMap","trMapFile",0,"Name of the file containing transcript to gene mapping.");
+   args.addOptionS("G","geneList","geneListFile",0,"Name of the file containing list of gene names (one for each transcript).");
+   args.addOptionB("","groupByGene","groupByGene",0,"Group transcripts by genes (this can change the default order of output.");
+   args.addOptionB("","updateTrFile","updateTrFile",0,"Update trInfoFile if new gene names were provided (with trMapFile or geneListFile).");
+   if(!args.parse(*argc,argv))return 0;
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+   // }}}
+   bool doLog,doOut=args.isSet("outFileName"),doSummaries=args.isSet("sumFileName");
+   if(! (doOut || doSummaries)){
+      error("Main: Have to specify at least one of --outFile/--sumFile.\n");
+      return 1;
+   }
+   doLog = ns_genes::getLog(args);
+   
+   long N=0,M=0,G;
+   TranscriptInfo trInfo;
+   PosteriorSamples  samples;
+   if(!ns_genes::prepareInput(args, &trInfo, &samples, &M, &N, &G))return 1;
+   if(!ns_genes::updateGenes(args, &trInfo, &G))return 1;
+   if(args.verb())messageF("Genes: %ld\n",G);
+   if(!ns_genes::checkGeneCount(G,M))return 1;
+   if(args.flag("updateTrFile") && (args.isSet("trMapFile") || args.isSet("geneListFile"))){
+      if(args.verb())message("Updating transcript info file with new gene names.\n");
+      if(!trInfo.writeInfo(args.getS("trInfoFileName"), true)){
+         if(args.verb())warning("Main: Updating trInfoFile failed.\n");
+      }
+   }
+
+   ofstream outFile,sumFile;
+   if(doOut){
+      if(!ns_misc::openOutput(args, &outFile))return 1;;
+      // Write output header {{{
+      outFile<<"# from: "<<args.args()[0]<<"\n# samples of within gene expression\n";
+      if(! trInfo.genesOrdered()){
+         if(args.flag("groupByGene")){
+            warning("Main: Transcripts in output file will be reordered and grouped by genes.\n");
+            outFile<<"# WARNING: transcripts in output file are reordered and grouped by genes.\n";
+         }else{
+            warning("Main: Transcripts are not grouped by genes.\n"
+                    "   The transcript order will be kept the same but computation will be slower.\n");
+         }
+      }
+      if(doLog)outFile<<"# L \n";
+      outFile<<"# T (M rows,N cols)\n";
+      outFile<<"# M "<<M<<"\n# N "<<N<<endl;
+      // Set precision.
+      (outFile<<scientific).precision(9);
+      // }}}
+   }
+   if(doSummaries){
+      if(!ns_misc::openOutput(args.getS("sumFileName"), &sumFile))return 1;
+      sumFile<<"# from: "<<args.args()[0]<<"\n# <mean> <variance> <mean of within gene expression>  <variance of within gene expression>\n# M "<<M<<endl;
+      // Set precision.
+      (sumFile<<scientific).precision(9);
+   }
+   vector<long double> mean(M,0),mean2(M,0),sqSum(M,0),sqSum2(M,0);
+   vector< vector<double> > trs;
+   vector<double> sum;
+   // Nrmalisation constant are 1 by default, or equivalently 0 in LOG case.
+   vector<double> normals(N,(int)(!doLog));
+   long i,j,g,gM,m;
+   if(args.flag("adjust")&&(doSummaries)){
+      // 'normals' are only precomputed so that non-relative mean and variance are computed from
+      // length adjusted and normalised expression.
+      vector<double> tr(M);
+      if(args.verbose)message("Computing normalization constants, because of length adjustment.\n");
+      normals.assign(N,0);
+      for(j=0;j<M;j++){
+         if(args.verbose)progressLog(j,M);
+         samples.getTranscript(j,tr);
+         for(i=0;i<N;i++)
+            normals[i] += tr[i]/trInfo.L(j);
+      }
+      if(doLog)for(i=0;i<N;i++)normals[i] = 
+            (normals[i] != 0) ? log(normals[i]) : ns_misc::LOG_ZERO;
+   }
+   if(args.verbose)message("Computing within gene relative expression.\n");
+   g = -2;
+   if(!args.flag("groupByGene")){
+      long curJ=0;
+      // Here we iterate over transcripts:
+      //  For each transript: load all transcripts of a gene of current transcripts
+      //  If gene is same as for previous, then just reuse information
+      for(m=0;m<M;m++){
+         if(args.verbose)progressLog(m,M);
+         if(trInfo.geId(m) == g){
+            for(j=0;j<gM;j++)if(trInfo.getGtrs(g)[j] == m){curJ = j; break;}
+         }else{
+            g = trInfo.geId(m);
+            ns_withinGene::readTranscripts(g, trInfo, &samples, &gM, &trs);
+            curJ = 0;
+            for(j=0;j<gM;j++)if(trInfo.getGtrs(g)[j] == m){curJ = j; break;}
+            if(args.flag("adjust"))ns_withinGene::adjustExpression(g, trInfo, &trs);
+            ns_withinGene::getSum(gM, N, trs, &sum);
+         }
+         for(i=0;i<N;i++){
+            if(doLog)trs[curJ][i] = log(trs[curJ][i]);
+            if(doSummaries) ns_withinGene::updateSummaries(trs[curJ][i], &(mean[m]), &(sqSum[m]), normals[i], doLog);
+            if(doLog)trs[curJ][i] -= log(sum[i]);
+            else trs[curJ][i] /= sum[i];
+            if(doSummaries) ns_withinGene::updateSummaries(trs[curJ][i], &mean2[m], &sqSum2[m]);
+         }
+         if(doOut){
+            ns_withinGene::writeTr(N, trs[curJ], &outFile);
+         }
+      }
+   }else{
+      // Here we iterate over genes:
+      //  Calculate values for all their transcripts
+      //  Write all transcripts of current gene
+      for(g=0;g<G;g++){
+         if(args.verbose)progressLog(g,G);
+         ns_withinGene::readTranscripts(g, trInfo, &samples, &gM, &trs);
+         if(args.flag("adjust"))ns_withinGene::adjustExpression(g, trInfo, &trs);
+         ns_withinGene::getSum(gM, N, trs, &sum);
+         for(i=0;i<N;i++){
+            for(j=0;j<gM;j++){
+               m = trInfo.getGtrs(g)[j];
+               if(doLog)trs[j][i] = log(trs[j][i]);
+               if(doSummaries) ns_withinGene::updateSummaries(trs[j][i], &mean[m], &sqSum[m], normals[i], doLog);
+               if(doLog)trs[j][i] -= log(sum[i]);
+               else trs[j][i] /= sum[i];
+               if(doSummaries) ns_withinGene::updateSummaries(trs[j][i], &mean2[m], &sqSum2[m]);
+            }
+         }
+         if(doOut){
+            for(j=0;j<gM;j++){
+               ns_withinGene::writeTr(N, trs[j], &outFile);
+            }
+         }
+      }
+   }
+   if(doOut)outFile.close();
+   if(doSummaries){
+      long double var,var2;
+      for(m=0;m<M;m++){
+         mean[m] /= N;
+         var = sqSum[m]/N - mean[m]*mean[m];
+         mean2[m] /= N;
+         var2 = sqSum2[m]/N - mean2[m]*mean2[m];
+         sumFile<<mean[m]<<" "<<var<<" "<<mean2[m]<<" "<<var2<<endl;
+      }
+      sumFile.close();
+   }
+   if(args.verbose)message("DONE\n");
+   return 0;
+}
+
+
+#ifndef BIOC_BUILD
+int main(int argc,char* argv[]){
+   return getWithinGeneExpression(&argc,argv);
+}
+#endif
+
+
+namespace ns_withinGene {
+
+void readTranscripts(long g, const TranscriptInfo &trInfo, PosteriorSamples *samples, long *gM, vector< vector<double> > *trs){//{{{
+   *gM = trInfo.getGtrs(g).size();
+   if((long)trs->size() < *gM)trs->resize(*gM);
+   for(long j = 0; j < *gM; j++){
+      samples->getTranscript( trInfo.getGtrs(g)[j] , (*trs)[j]);
+   }
+}// }}}
+
+void adjustExpression(long g, const TranscriptInfo &trInfo, vector< vector<double> > *trs){//{{{
+   long N,gM = trInfo.getGtrs(g).size();
+   double l;
+   for(long j=0; j<gM; j++){
+      l = trInfo.L(trInfo.getGtrs(g)[j]);
+      N = (*trs)[j].size();
+      for(long n=0; n<N; n++){
+         (*trs)[j][n] /= l;
+      }
+   }
+}// }}}
+
+void getSum(long gM, long N, const vector< vector<double> > &trs, vector<double> *sum){//{{{
+   sum->assign(N,0);
+   for(long j=0; j<gM; j++)
+      for(long n=0; n<N; n++)(*sum)[n] += trs[j][n];
+}// }}}
+
+void updateSummaries(double x, long double *mean, long double *sqSum, double norm, bool doLog){//{{{
+   if(doLog) x -= norm;
+   else x = (norm != 0) ? x/norm : x;
+   
+   *mean += x;
+   *sqSum += x*x;
+}// }}}
+
+void writeTr(long N, const vector<double> &tr, ofstream *outFile){//{{{
+   for(long n=0; n<N-1; n++)
+      (*outFile)<<tr[n]<<" ";
+   (*outFile)<<tr[N-1]<<endl;
+}// }}}
+
+} // namespace ns_withinGene
+
diff --git a/lowess.cpp b/lowess.cpp
new file mode 100644
index 0000000..efdb215
--- /dev/null
+++ b/lowess.cpp
@@ -0,0 +1,511 @@
+/*
+ *  c++ implementation of Lowess weighted regression by 
+ *  Peter Glaus http://www.cs.man.ac.uk/~glausp/
+ *
+ *
+ *  Based on fortran code by Cleveland downloaded from:
+ *  http://netlib.org/go/lowess.f
+ *  original author:
+* wsc at research.bell-labs.com Mon Dec 30 16:55 EST 1985
+* W. S. Cleveland
+* Bell Laboratories
+* Murray Hill NJ 07974
+ *  
+ *  See original documentation below the code for details.
+ * 
+ */
+#include<algorithm>
+#include<cmath>
+#include<fstream>
+
+using namespace std;
+
+#include "lowess.h"
+#include "common.h"
+
+void lowess(const vector<double> &x, const vector<double> &y, double f, long nsteps, vector<double> &ys){//{{{
+   vector<double> rw,res;
+   lowess(x,y,f,nsteps,0.,ys,rw,res);
+}//}}}
+void lowess(const vector<double> &x, const vector<double> &y, double f, long nsteps, double delta, vector<double> &ys, vector<double> &rw, vector<double>&res){ //{{{
+   long n=(long)x.size();
+   bool ok=false;
+   long nleft,nright, i, j, iter, last, m1, m2, ns;
+   double cut, cmad, r, d1, d2, c1, c9, alpha, denom;
+   if((n==0)||((long)y.size()!=n)) return;
+   ys.resize(n);
+   rw.resize(n);
+   res.resize(n);
+   if(n==1){
+      ys[0]=y[0];
+      return;
+   }
+   // ns - at least 2, at most n
+   ns = max(min((long)(f*n),n),(long)2);
+   for(iter=0;iter<nsteps+1; iter++){
+      // robustnes iterations
+      nleft = 0;
+      nright = ns-1;
+      // index of last estimated point
+      last = -1;
+      // index of current point
+      i=0;
+      do{
+         while(nright<n-1){
+            // move <nleft,nright> right, while radius decreases
+            d1 = x[i]-x[nleft];
+            d2 = x[nright+1] - x[i];
+            if(d1<=d2)break;
+            nleft++;
+            nright++;
+         }
+         // fit value at x[i]
+         lowest(x,y,x[i],ys[i],nleft,nright,res,iter>0,rw,ok);
+         if(!ok) ys[i]=y[i];
+         if(last<i-1){
+            // interpolate skipped points
+            if(last<0){
+               warning("Lowess: out of range.\n");
+            }
+            denom = x[i] - x[last];
+            for(j=last+1;j<i;j++){
+               alpha = (x[j]-x[last])/denom;
+               ys[j] = alpha * ys[i] + (1.0-alpha)*ys[last];
+            }
+         }
+         last = i;
+         cut = x[last]+delta;
+         for(i=last+1;i<n;i++){
+            if(x[i]>cut)break;
+            if(x[i]==x[last]){
+               ys[i]=ys[last];
+               last=i;
+            }
+         }
+         i=max(last+1,i-1);
+      }while(last<n-1);
+      for(i=0;i<n;i++)
+         res[i] = y[i]-ys[i];
+      if(iter==nsteps)break ;
+      for(i=0;i<n;i++)
+         rw[i]=abs(res[i]);
+      sort(rw.begin(),rw.end());
+      m1 = n/2+1;
+      m2 = n-m1;
+      m1 --;
+      cmad = 3.0 *(rw[m1]+rw[m2]);
+      c9 = .999*cmad;
+      c1 = .001*cmad;
+      for(i=0;i<n;i++){
+         r = abs(res[i]);
+         if(r<=c1) rw[i]=1;
+         else if(r>c9) rw[i]=0;
+         else rw[i] = (1.0-(r/cmad)*(r/cmad))*(1.0-(r/cmad)*(r/cmad));
+      }
+   }
+}//}}}
+
+void lowest(const vector<double> &x, const vector<double> &y, double xs, double &ys, long nleft, long nright, vector<double> &w, bool userw,  vector<double> &rw, bool &ok){//{{{
+   long n = (long)x.size();
+   long nrt, j;
+   double a, b, c, h, r, h1, h9, range;
+   range = x[n-1]-x[0];
+   h = max(xs-x[nleft],x[nright]-xs);
+   h9 = 0.999*h;
+   h1 = 0.001*h;
+   // sum of weights
+   a = 0; 
+   for(j=nleft;j<n;j++){
+      // compute weights (pick up all ties on right)
+      w[j]=0.;
+      r = abs(x[j]-xs);
+      if(r<=h9){
+         // small enough for non-zero weight
+         if(r>h1) w[j] = (1.0-(r/h)*(r/h)*(r/h))*(1.0-(r/h)*(r/h)*(r/h))*(1.0-(r/h)*(r/h)*(r/h));
+         else w[j] = 1.;
+         if(userw) w[j] *= rw[j];
+         a += w[j];
+      }else if(x[j]>xs) break; // get out at first zero wt on right
+   }
+   nrt = j-1;
+   // rightmost pt (may be greater than nright because of ties)
+   if(a<=0.) ok = false;
+   else{
+      // weighted least squares
+      ok = true;
+      // normalize weights
+      for(j=nleft;j<=nrt;j++)
+         w[j] /= a;
+      if(h>0.){
+         // use linear fit
+         a = 0.;
+         for(j=nleft;j<=nrt;j++)
+            a += w[j]*x[j]; // weighted centre of values
+         b = xs-a;
+         c = 0;
+         for(j=nleft;j<=nrt;j++)
+            c += w[j]*(x[j]-a)*(x[j]-a);
+         if(sqrt(c)>0.001*range){
+            // points are spread enough to compute slope
+            b /= c;
+            for(j=nleft;j<=nrt;j++)
+               w[j] *= (1.0+b*(x[j]-a));
+         }
+      }
+      ys = 0;
+      for(j=nleft;j<=nrt;j++)
+         ys += w[j]*y[j];
+   }
+}//}}}
+
+/* {{{ Documentation
+* wsc at research.bell-labs.com Mon Dec 30 16:55 EST 1985
+* W. S. Cleveland
+* Bell Laboratories
+* Murray Hill NJ 07974
+* 
+* outline of this file:
+*    lines 1-72   introduction
+*        73-177   documentation for lowess
+*       178-238   ratfor version of lowess
+*       239-301   documentation for lowest
+*       302-350   ratfor version of lowest
+*       351-end   test driver and fortran version of lowess and lowest
+* 
+*   a multivariate version is available by "send dloess from a"
+* 
+*              COMPUTER PROGRAMS FOR LOCALLY WEIGHTED REGRESSION
+* 
+*             This package consists  of  two  FORTRAN  programs  for
+*        smoothing    scatterplots   by   robust   locally   weighted
+*        regression, or lowess.   The  principal  routine  is  LOWESS
+*        which   computes   the  smoothed  values  using  the  method
+*        described in The Elements of Graphing Data, by William S.
+*        Cleveland    (Wadsworth,    555 Morego   Street,   Monterey,
+*        California 93940).
+* 
+*             LOWESS calls a support routine, LOWEST, the code for
+*        which is included. LOWESS also calls a routine  SORT,  which
+*        the user must provide.
+* 
+*             To reduce the computations, LOWESS  requires  that  the
+*        arrays  X  and  Y,  which  are  the  horizontal and vertical
+*        coordinates, respectively, of the scatterplot, be such  that
+*        X  is  sorted  from  smallest  to  largest.   The  user must
+*        therefore use another sort routine which will sort X  and  Y
+*        according  to X.
+*             To summarize the scatterplot, YS,  the  fitted  values,
+*        should  be  plotted  against X.   No  graphics  routines are
+*        available in the package and must be supplied by the user.
+* 
+*             The FORTRAN code for the routines LOWESS and LOWEST has
+*        been   generated   from   higher   level   RATFOR   programs
+*        (B. W. Kernighan, ``RATFOR:  A Preprocessor for  a  Rational
+*        Fortran,''  Software Practice and Experience, Vol. 5 (1975),
+*        which are also included.
+* 
+*             The following are data and output from LOWESS that  can
+*        be  used  to check your implementation of the routines.  The
+*        notation (10)v means 10 values of v.
+* 
+* 
+* 
+* 
+*        X values:
+*          1  2  3  4  5  (10)6  8  10  12  14  50
+* 
+*        Y values:
+*           18  2  15  6  10  4  16  11  7  3  14  17  20  12  9  13  1  8  5  19
+* 
+* 
+*        YS values with F = .25, NSTEPS = 0, DELTA = 0.0
+*         13.659  11.145  8.701  9.722  10.000  (10)11.300  13.000  6.440  5.596
+*           5.456  18.998
+* 
+*        YS values with F = .25, NSTEPS = 0 ,  DELTA = 3.0
+*          13.659  12.347  11.034  9.722  10.511  (10)11.300  13.000  6.440  5.596
+*            5.456  18.998
+* 
+*        YS values with F = .25, NSTEPS = 2, DELTA = 0.0
+*          14.811  12.115  8.984  9.676  10.000  (10)11.346  13.000  6.734  5.744
+*            5.415  18.998
+* 
+* 
+* 
+* 
+*                                   LOWESS
+* 
+* 
+* 
+*        Calling sequence
+* 
+*        CALL LOWESS(X,Y,N,F,NSTEPS,DELTA,YS,RW,RES)
+* 
+*        Purpose
+* 
+*        LOWESS computes the smooth of a scatterplot of Y  against  X
+*        using  robust  locally  weighted regression.  Fitted values,
+*        YS, are computed at each of the  values  of  the  horizontal
+*        axis in X.
+* 
+*        Argument description
+* 
+*              X = Input; abscissas of the points on the
+*                  scatterplot; the values in X must be ordered
+*                  from smallest to largest.
+*              Y = Input; ordinates of the points on the
+*                  scatterplot.
+*              N = Input; dimension of X,Y,YS,RW, and RES.
+*              F = Input; specifies the amount of smoothing; F is
+*                  the fraction of points used to compute each
+*                  fitted value; as F increases the smoothed values
+*                  become smoother; choosing F in the range .2 to
+*                  .8 usually results in a good fit; if you have no
+*                  idea which value to use, try F = .5.
+*         NSTEPS = Input; the number of iterations in the robust
+*                  fit; if NSTEPS = 0, the nonrobust fit is
+*                  returned; setting NSTEPS equal to 2 should serve
+*                  most purposes.
+*          DELTA = input; nonnegative parameter which may be used
+*                  to save computations; if N is less than 100, set
+*                  DELTA equal to 0.0; if N is greater than 100 you
+*                  should find out how DELTA works by reading the
+*                  additional instructions section.
+*             YS = Output; fitted values; YS(I) is the fitted value
+*                  at X(I); to summarize the scatterplot, YS(I)
+*                  should be plotted against X(I).
+*             RW = Output; robustness weights; RW(I) is the weight
+*                  given to the point (X(I),Y(I)); if NSTEPS = 0,
+*                  RW is not used.
+*            RES = Output; residuals; RES(I) = Y(I)-YS(I).
+* 
+* 
+*        Other programs called
+* 
+*               LOWEST
+*               SSORT
+* 
+*        Additional instructions
+* 
+*        DELTA can be used to save computations.   Very  roughly  the
+*        algorithm  is  this:   on the initial fit and on each of the
+*        NSTEPS iterations locally weighted regression fitted  values
+*        are computed at points in X which are spaced, roughly, DELTA
+*        apart; then the fitted values at the  remaining  points  are
+*        computed  using  linear  interpolation.   The  first locally
+*        weighted regression (l.w.r.) computation is carried  out  at
+*        X(1)  and  the  last  is  carried  out at X(N).  Suppose the
+*        l.w.r. computation is carried out at  X(I).   If  X(I+1)  is
+*        greater  than  or  equal  to  X(I)+DELTA,  the  next  l.w.r.
+*        computation is carried out at X(I+1).   If  X(I+1)  is  less
+*        than X(I)+DELTA, the next l.w.r.  computation is carried out
+*        at the largest X(J) which is greater than or equal  to  X(I)
+*        but  is not greater than X(I)+DELTA.  Then the fitted values
+*        for X(K) between X(I)  and  X(J),  if  there  are  any,  are
+*        computed  by  linear  interpolation  of the fitted values at
+*        X(I) and X(J).  If N is less than 100 then DELTA can be  set
+*        to  0.0  since  the  computation time will not be too great.
+*        For larger N it is typically not necessary to carry out  the
+*        l.w.r.  computation for all points, so that much computation
+*        time can be saved by taking DELTA to be  greater  than  0.0.
+*        If  DELTA =  Range  (X)/k  then,  if  the  values  in X were
+*        uniformly  scattered  over  the  range,  the   full   l.w.r.
+*        computation  would be carried out at approximately k points.
+*        Taking k to be 50 often works well.
+* 
+*        Method
+* 
+*        The fitted values are computed by using the nearest neighbor
+*        routine  and  robust locally weighted regression of degree 1
+*        with the tricube weight function.  A few additional features
+*        have  been  added.  Suppose r is FN truncated to an integer.
+*        Let  h  be  the  distance  to  the  r-th  nearest   neighbor
+*        from X(I).   All  points within h of X(I) are used.  Thus if
+*        the r-th nearest neighbor is exactly the  same  distance  as
+*        other  points,  more  than r points can possibly be used for
+*        the smooth at  X(I).   There  are  two  cases  where  robust
+*        locally  weighted regression of degree 0 is actually used at
+*        X(I).  One case occurs when  h  is  0.0.   The  second  case
+*        occurs  when  the  weighted  standard error of the X(I) with
+*        respect to the weights w(j) is  less  than  .001  times  the
+*        range  of the X(I), where w(j) is the weight assigned to the
+*        j-th point of X (the tricube  weight  times  the  robustness
+*        weight)  divided by the sum of all of the weights.  Finally,
+*        if the w(j) are all zero for the smooth at X(I), the  fitted
+*        value is taken to be Y(I).
+* 
+* 
+* 
+* 
+*  subroutine lowess(x,y,n,f,nsteps,delta,ys,rw,res)
+*  real x(n),y(n),ys(n),rw(n),res(n)
+*  logical ok
+*  if (n<2){ ys(1) = y(1); return }
+*  ns = max0(min0(ifix(f*float(n)),n),2)  # at least two, at most n points
+*  for(iter=1; iter<=nsteps+1; iter=iter+1){      # robustness iterations
+*         nleft = 1; nright = ns
+*         last = 0        # index of prev estimated point
+*         i = 1   # index of current point
+*         repeat{
+*                 while(nright<n){
+*  # move nleft, nright to right if radius decreases
+*                         d1 = x(i)-x(nleft)
+*                         d2 = x(nright+1)-x(i)
+*  # if d1<=d2 with x(nright+1)==x(nright), lowest fixes
+*                         if (d1<=d2) break
+*  # radius will not decrease by move right
+*                         nleft = nleft+1
+*                         nright = nright+1
+*                         }
+*                 call lowest(x,y,n,x(i),ys(i),nleft,nright,res,iter>1,rw,ok)
+*  # fitted value at x(i)
+*                 if (!ok) ys(i) = y(i)
+*  # all weights zero - copy over value (all rw==0)
+*                 if (last<i-1) { # skipped points -- interpolate
+*                         denom = x(i)-x(last)    # non-zero - proof?
+*                         for(j=last+1; j<i; j=j+1){
+*                                 alpha = (x(j)-x(last))/denom
+*                                 ys(j) = alpha*ys(i)+(1.0-alpha)*ys(last)
+*                                 }
+*                         }
+*                 last = i        # last point actually estimated
+*                 cut = x(last)+delta     # x coord of close points
+*                 for(i=last+1; i<=n; i=i+1){     # find close points
+*                         if (x(i)>cut) break     # i one beyond last pt within cut
+*                         if(x(i)==x(last)){      # exact match in x
+*                                 ys(i) = ys(last)
+*                                 last = i
+*                                 }
+*                         }
+*                 i=max0(last+1,i-1)
+*  # back 1 point so interpolation within delta, but always go forward
+*                 } until(last>=n)
+*         do i = 1,n      # residuals
+*                 res(i) = y(i)-ys(i)
+*         if (iter>nsteps) break  # compute robustness weights except last time
+*         do i = 1,n
+*                 rw(i) = abs(res(i))
+*         call sort(rw,n)
+*         m1 = 1+n/2; m2 = n-m1+1
+*         cmad = 3.0*(rw(m1)+rw(m2))      # 6 median abs resid
+*         c9 = .999*cmad; c1 = .001*cmad
+*         do i = 1,n {
+*                 r = abs(res(i))
+*                 if(r<=c1) rw(i)=1.      # near 0, avoid underflow
+*                 else if(r>c9) rw(i)=0.  # near 1, avoid underflow
+*                 else rw(i) = (1.0-(r/cmad)**2)**2
+*                 }
+*         }
+*  return
+*  end
+* 
+* 
+* 
+* 
+*                                   LOWEST
+* 
+* 
+* 
+*        Calling sequence
+* 
+*        CALL LOWEST(X,Y,N,XS,YS,NLEFT,NRIGHT,W,USERW,RW,OK)
+* 
+*        Purpose
+* 
+*        LOWEST is a support routine for LOWESS and  ordinarily  will
+*        not  be  called  by  the  user.   The  fitted  value, YS, is
+*        computed  at  the  value,  XS,  of  the   horizontal   axis.
+*        Robustness  weights,  RW,  can  be employed in computing the
+*        fit.
+* 
+*        Argument description
+* 
+* 
+*              X = Input; abscissas of the points on the
+*                  scatterplot; the values in X must be ordered
+*                  from smallest to largest.
+*              Y = Input; ordinates of the points on the
+*                  scatterplot.
+*              N = Input; dimension of X,Y,W, and RW.
+*             XS = Input; value of the horizontal axis at which the
+*                  smooth is computed.
+*             YS = Output; fitted value at XS.
+*          NLEFT = Input; index of the first point which should be
+*                  considered in computing the fitted value.
+*         NRIGHT = Input; index of the last point which should be
+*                  considered in computing the fitted value.
+*              W = Output; W(I) is the weight for Y(I) used in the
+*                  expression for YS, which is the sum from
+*                  I = NLEFT to NRIGHT of W(I)*Y(I); W(I) is
+*                  defined only at locations NLEFT to NRIGHT.
+*          USERW = Input; logical variable; if USERW is .TRUE., a
+*                  robust fit is carried out using the weights in
+*                  RW; if USERW is .FALSE., the values in RW are
+*                  not used.
+*             RW = Input; robustness weights.
+*             OK = Output; logical variable; if the weights for the
+*                  smooth are all 0.0, the fitted value, YS, is not
+*                  computed and OK is set equal to .FALSE.; if the
+*                  fitted value is computed OK is set equal to
+* 
+* 
+*        Method
+* 
+*        The smooth at XS is computed using (robust) locally weighted
+*        regression of degree 1.  The tricube weight function is used
+*        with h equal to the maximum of XS-X(NLEFT) and X(NRIGHT)-XS.
+*        Two  cases  where  the  program  reverts to locally weighted
+*        regression of degree 0 are described  in  the  documentation
+*        for LOWESS.
+* 
+* 
+* 
+* 
+*  subroutine lowest(x,y,n,xs,ys,nleft,nright,w,userw,rw,ok)
+*  real x(n),y(n),w(n),rw(n)
+*  logical userw,ok
+*  range = x(n)-x(1)
+*  h = amax1(xs-x(nleft),x(nright)-xs)
+*  h9 = .999*h
+*  h1 = .001*h
+*  a = 0.0        # sum of weights
+*  for(j=nleft; j<=n; j=j+1){     # compute weights (pick up all ties on right)
+*         w(j)=0.
+*         r = abs(x(j)-xs)
+*         if (r<=h9) {    # small enough for non-zero weight
+*                 if (r>h1) w(j) = (1.0-(r/h)**3)**3
+*                 else      w(j) = 1.
+*                 if (userw) w(j) = rw(j)*w(j)
+*                 a = a+w(j)
+*                 }
+*         else if(x(j)>xs)break   # get out at first zero wt on right
+*         }
+*  nrt=j-1        # rightmost pt (may be greater than nright because of ties)
+*  if (a<=0.0) ok = FALSE
+*  else { # weighted least squares
+*         ok = TRUE
+*         do j = nleft,nrt
+*                 w(j) = w(j)/a   # make sum of w(j) == 1
+*         if (h>0.) {     # use linear fit
+*                 a = 0.0
+*                 do j = nleft,nrt
+*                         a = a+w(j)*x(j) # weighted center of x values
+*                 b = xs-a
+*                 c = 0.0
+*                 do j = nleft,nrt
+*                         c = c+w(j)*(x(j)-a)**2
+*                 if(sqrt(c)>.001*range) {
+*  # points are spread out enough to compute slope
+*                         b = b/c
+*                         do j = nleft,nrt
+*                                 w(j) = w(j)*(1.0+b*(x(j)-a))
+*                         }
+*                 }
+*         ys = 0.0
+*         do j = nleft,nrt
+*                 ys = ys+w(j)*y(j)
+*         }
+*  return
+*  end
+* 
+}}}*/
diff --git a/lowess.h b/lowess.h
new file mode 100644
index 0000000..f20a518
--- /dev/null
+++ b/lowess.h
@@ -0,0 +1,30 @@
+/*
+ *  c++ implementation of Lowess weighted regression by 
+ *  Peter Glaus http://www.cs.man.ac.uk/~glausp/
+ *
+ *
+ *  Based on fortran code by Cleveland downloaded from:
+ *  http://netlib.org/go/lowess.f
+ *  original author:
+* wsc at research.bell-labs.com Mon Dec 30 16:55 EST 1985
+* W. S. Cleveland
+* Bell Laboratories
+* Murray Hill NJ 07974
+ *  
+ *  See original documentation in the .cpp file for details.
+ * 
+ */
+#ifndef LOWESS_H
+#define LOWESS_H
+
+#include<vector>
+
+using namespace std;
+
+void lowess(const vector<double> &x, const vector<double> &y, double f, long nsteps, double delta, vector<double> &ys, vector<double> &rw, vector<double> &res);
+
+void lowess(const vector<double> &x, const vector<double> &y, double f, long nsteps, vector<double> &ys);
+
+void lowest(const vector<double> &x, const vector<double> &y, double xs, double &ys, long nleft, long nright, vector<double> &w,bool userw,  vector<double> &rw, bool &ok);
+
+#endif
diff --git a/misc.cpp b/misc.cpp
new file mode 100644
index 0000000..256dd63
--- /dev/null
+++ b/misc.cpp
@@ -0,0 +1,240 @@
+#include <algorithm>
+#include <ctime>
+#include <cmath>
+
+#include "misc.h"
+
+#include "FileHeader.h"
+
+#include "common.h"
+
+namespace ns_math {
+double logAddExp(double a, double b){ //{{{
+   if(a>b){
+      return a+log1p(exp(b-a));
+   }else {
+      return b+log1p(exp(a-b));
+   }
+} //}}}
+double logSumExp(const vector<double> &vals, long st, long en){ //{{{
+   if(st<0)st = 0;
+   if((en == -1) || (en > (long)vals.size())) en = vals.size();
+   if(st >= en)return 0;
+   double sumE = 0, m = *max_element(vals.begin() + st,vals.begin() + en);
+   for(long i = st; i < en; i++)
+      sumE += exp(vals[i] - m);
+   return  m + log(sumE);
+} //}}}
+} // namespace ns_math
+
+namespace ns_expression {
+
+string getOutputType(const ArgumentParser &args, const string &defaultType){ //{{{
+   string type = ns_misc::toLower(args.getS("outputType"));
+   if((type!="theta") && (type!="rpkm") && (type!="counts") && (type!="tau")){
+      type = defaultType;
+      warning("Using output type %s.",type.c_str());
+   }
+   return type;
+} //}}}
+} // namespace ns_expression
+
+namespace ns_misc {
+long getSeed(const ArgumentParser &args){//{{{
+   long seed;
+   if(args.isSet("seed"))seed=args.getL("seed");
+   else seed = time(NULL);
+   if(args.verbose)message("seed: %ld\n",seed);
+   return seed;
+}//}}}
+bool openOutput(const ArgumentParser &args, ofstream *outF){//{{{
+   outF->open(args.getS("outFileName").c_str());
+   if(!outF->is_open()){
+      error("Main: Output file open failed.\n");
+      return false;
+   }
+   return true;
+}//}}}
+bool openOutput(const string &name, ofstream *outF) {//{{{
+   outF->open(name.c_str());
+   if(!outF->is_open()){
+      error("Main: File '%s' open failed.\n",name.c_str());
+      return false;
+   }
+   return true;
+}//}}}
+
+bool readConditions(const ArgumentParser &args, long *C, long *M, long *N, Conditions *cond){//{{{
+   if(! cond->init("NONE", args.args(), C, M, N)){
+      error("Main: Failed loading MCMC samples.\n");
+      return false;
+   }
+   if(args.isSet("normalization")){
+      if(! cond->setNorm(args.getTokenizedS2D("normalization"))){
+         error("Main: Applying normalization constants failed.\n");
+         return false;
+      }
+   }
+   if(!cond->logged() && args.verb()){
+      message("Samples are not logged. (will log for you)\n");
+      message("Using %lg as minimum instead of log(0).\n",LOG_ZERO);
+   }
+   if(args.verb())message("Files with samples loaded.\n");
+   return true;
+}//}}}
+
+void computeCI(double cf, vector<double> *difs, double *ciLow, double *ciHigh){//{{{
+   cf = (100 - cf) / 2.0;
+   double N = difs->size();
+   sort(difs->begin(),difs->end());
+   *ciLow = (*difs)[(long)(N/100.*cf)];
+   *ciHigh = (*difs)[(long)(N-N/100.*cf)];
+}//}}}
+
+string toLower(string str){//{{{
+   for(size_t i=0;i<str.size();i++)
+      if((str[i]>='A')&&(str[i]<='Z'))str[i]=str[i]-'A'+'a';
+   return str;
+}//}}}
+
+vector<string> tokenize(const string &input,const string &space){//{{{
+   vector<string> ret;
+   long pos=0,f=0,n=input.size();
+   while((pos<n)&&(f<n)&&(f>=0)){
+      f=input.find(space,pos);
+      if(f==pos)pos++;
+      else{
+         if((f<n)&&(f>=0)){
+            ret.push_back(input.substr(pos,f-pos));
+            pos=f+1;
+         }
+      }
+   }
+   if(pos<n)ret.push_back(input.substr(pos,n-pos));
+   return ret;
+} //}}}
+} // namespace ns_misc
+
+namespace ns_genes {
+bool getLog(const ArgumentParser &args){// {{{
+   if(args.flag("log")){
+      if(args.verb())message("Using logged values.\n");
+      return true;
+   }
+   if(args.verb())message("NOT using logged values.\n");
+   return false;
+}// }}}
+
+bool prepareInput(const ArgumentParser &args, TranscriptInfo *trInfo, PosteriorSamples *samples, long *M, long *N, long *G){// {{{
+   if(! trInfo->readInfo(args.getS("trInfoFileName"))) return false;
+   *G = trInfo->getG();
+   if((! samples->initSet(M,N,args.args()[0]))||(*M<=0)||(*N<=0)){
+      error("Main: Failed loading MCMC samples.\n");
+      return false;
+   }
+   if(*M!=trInfo->getM()){
+      error("Main: Number of transcripts in the info file and samples file are different: %ld vs %ld\n",trInfo->getM(),*M);
+      return false;
+   }
+   if(args.verb())messageF("Transcripts: %ld\n",*M);
+   return true;
+}// }}}
+
+bool updateGenes(const ArgumentParser &args, TranscriptInfo *trInfo, long *G){//{{{
+   if(!(args.isSet("trMapFile") || args.isSet("geneListFile")))return true;
+   if(args.isSet("trMapFile") && args.isSet("geneListFile")){
+      error("Main: Please provide only one of trMapFile and geneListFile, both serve the same function.\n");
+      return false;
+   }
+   bool isMap;
+   ifstream mapFile;
+   if(args.isSet("trMapFile")){
+      isMap = true;
+      mapFile.open(args.getS("trMapFile").c_str());
+   }else {
+      isMap = false;
+      mapFile.open(args.getS("geneListFile").c_str());
+   }
+   if(!mapFile.is_open()){
+      if(isMap){
+         error("Main: Failed reading file with transcript to gene mapping.\n");
+      }else{
+         error("Main: Failed reading file with gene names.\n");
+      }
+      return false;
+   }
+   map<string,string> trMap;
+   vector<string> geneList;
+   string trName,geName;
+   while(mapFile.good()){
+      while(mapFile.good() && (mapFile.peek()=='#'))
+         mapFile.ignore(100000000,'\n');
+      if(!mapFile.good()) break;
+      mapFile>>geName;
+      if(isMap){
+         mapFile>>trName;
+      }
+      if(!mapFile.fail()){
+         if(isMap){
+            trMap[trName]=geName;
+         }else{
+            geneList.push_back(geName);
+         }
+      }
+      mapFile.ignore(100000000,'\n');
+   }
+   mapFile.close();
+   bool succ;
+   if(isMap)succ = trInfo->updateGeneNames(trMap);
+   else succ = trInfo->updateGeneNames(geneList);
+   if(!succ){
+      error("Main: Filed setting gene information.\n");
+      return false;
+   }
+   *G = trInfo->getG();
+   return true;
+}//}}}
+
+bool checkGeneCount(long G, long M){//{{{
+   if((G != 1) && (G != M)) return true;
+   if(G==1){
+      error("Main: All transcripts share just one gene.\n");
+   }else{
+      error("Main: There are no transcripts sharing one gene.\n");
+   }
+   message("Please provide valid transcript to gene mapping (trMapFile or geneListFile).\n"
+           "   (trMap file should contain rows in format: <geneName> <transcriptName>.)\n"
+           "   (geneList file should contain rows with gene names, one per transcript.)\n");
+   return false;
+}//}}}
+} // namespace ns_genes
+
+namespace ns_params {
+bool readParams(const string &name, vector<paramT> *params, ofstream *outF){//{{{
+   long parN;
+   ifstream parFile(name.c_str());
+   FileHeader fh(&parFile);
+   if(!fh.paramsHeader(&parN, outF)){
+      error("Main: Problem loading parameters file %s\n",name.c_str());
+      return false;
+   }
+   // Vector of parameters: (mean expression, (alpha, beta) )
+   paramT param;
+   while(parFile.good()){
+      while((parFile.good())&&(parFile.peek()=='#')){
+         parFile.ignore(10000000,'\n');
+      }
+      parFile>>param.alpha>>param.beta>>param.expr;
+      if(parFile.good())
+         params->push_back(param);
+      parFile.ignore(10000000,'\n');
+   }
+   if((parN>0)&&(parN != (long)params->size())){
+      warning("Main: declared number of parameters does not match number of lines read (%ld %ld).\n", parN, (long)params->size());
+   }
+   fh.close();
+   sort(params->begin(),params->end());
+   return true;
+}//}}}
+
+} // namespace ns_params
diff --git a/misc.h b/misc.h
new file mode 100644
index 0000000..23ccb38
--- /dev/null
+++ b/misc.h
@@ -0,0 +1,84 @@
+#ifndef MISC_H
+#define MISC_H
+
+#include<fstream>
+
+#include "ArgumentParser.h"
+#include "PosteriorSamples.h"
+#include "TranscriptInfo.h"
+
+namespace ns_math {
+
+// For a=log(x), b=log(y); compute log(x+y).
+double logAddExp(double a, double b);
+
+// For vals_i = log(x_i); compute log(sum(x_i)) for st<=i<en.
+double logSumExp(const vector<double> &vals, long st = 0, long en = -1);
+
+}
+
+namespace ns_expression {
+
+// Return output type based on the command line argument (one of theta/rpkm/counts/tau).
+string getOutputType(const ArgumentParser &args, const string &defaultType = "rpkm");
+}
+
+namespace ns_misc {
+
+// Value to use instead of log(0).
+const double LOG_ZERO=-100;
+
+// Return seed; either using seed set in args, or by using time(NULL) as seed.
+long getSeed(const ArgumentParser &args);
+
+// Open output file based on standard argument --outFile=<outFileName>.
+bool openOutput(const ArgumentParser &args, ofstream *outF);
+// Open output file of a give name.
+bool openOutput(const string &name, ofstream *outF);
+
+// Reads and initializes files containing samples fro each condition and each replicate.
+bool readConditions(const ArgumentParser &args, long *C, long *M, long *N, Conditions *cond);
+
+// Compute confidence intervals.
+void computeCI(double cf, vector<double> *difs, double *ciLow, double *ciHigh);
+
+// Convert string into lower case.
+string toLower(string str);
+
+// Tokenize string into vector of strings based on separator.
+vector<string> tokenize(const string &input,const string &space = " ");
+}
+
+namespace ns_genes {
+// Return true if -l/--log is set.
+bool getLog(const ArgumentParser &args);
+
+// Initializes samples reader, trInfo and sets M,N,G.
+// Return false if reading failed or number fo transcripts does not match.
+bool prepareInput(const ArgumentParser &args, TranscriptInfo *trInfo, PosteriorSamples *samples, long *M, long *N, long *G);
+
+// Tries reading Transcript->Gene mapping from arguments provided (trMapFile or geneListFile)
+// and update gene info.
+bool updateGenes(const ArgumentParser &args, TranscriptInfo *trInfo, long *G);
+
+// Check whether gene cont is reasonable (G!=1 && G!=M)
+// and write appropriate error messages
+bool checkGeneCount(long G, long M);
+} // namespace ns_genes
+
+namespace ns_params{
+
+struct paramT {//{{{
+   double expr, alpha, beta;
+   bool operator< (const paramT &p2) const{
+      return expr<p2.expr;
+   }
+};//}}}
+
+// Read hyperparameters from a file specified by file name.
+// If outF is not NULL, it copies header from input file to outF.
+// The vector is sorted by expression at the end.
+bool readParams(const string &name, vector<paramT> *params, ofstream *outF = NULL);
+
+}
+#endif
diff --git a/parameters1.txt b/parameters1.txt
new file mode 100644
index 0000000..55c6b25
--- /dev/null
+++ b/parameters1.txt
@@ -0,0 +1,32 @@
+# parameters:
+# if this parameters file is used ( -p parameters1.txt ) then these values override the command line arguments --MCMC_*
+
+# length of burnIn
+burnIn 1000
+
+# initial number of samples, doubles every time targetScaleReduction is not met, until it reaches sampleNmax
+samplesN 1000
+# max number of samples generated in one iteration
+# after generating samplesNmax sampels, the program finishes even if some transcripts have not met the targetScaleReduction criteria
+samplesNmax 30000
+
+# number of samples actually recorded
+samplesSave 500
+
+# number of parallel chains
+chainsN 4
+
+# target scale reduction for the parameters
+# this applies only when option --scaleReduction is used
+# this parameter decides end of sampling
+# if you want to end simulation increase it, this file is read every time sampling finnishes k-th iteration of (2^(k-1))*samplesN samples
+#targetScaleReduction 1.2
+
+
+# parameters for the prior distributions
+#dirAlpha 1
+#dirBeta 1
+#betaAlpha 10
+#betaBeta 2
+
+
diff --git a/parseAlignment.cpp b/parseAlignment.cpp
new file mode 100644
index 0000000..63e19c7
--- /dev/null
+++ b/parseAlignment.cpp
@@ -0,0 +1,612 @@
+// DECLARATIONS: {{{
+#include<cmath>
+#include<set>
+
+using namespace std;
+
+#include "ArgumentParser.h"
+#include "misc.h"
+#include "MyTimer.h"
+#include "ReadDistribution.h"
+#include "TranscriptExpression.h"
+#include "TranscriptInfo.h"
+#include "TranscriptSequence.h"
+
+#include "common.h"
+//}}}
+
+//#define DEBUG_AT(x) message(x) 
+#define DEBUG_AT(x)
+
+namespace ns_parseAlignment {
+class TagAlignment{//{{{
+   protected:
+      int_least32_t trId;
+//      bool strand; // true = forward; false = reverse
+      double prob,lowProb;
+   public:
+      TagAlignment(long t=0,double p = 0,double lp = 0){
+         trId=(int_least32_t)t;
+//         strand=s;
+         prob=p;
+         lowProb=lp;
+      }
+      long getTrId()const {return trId;}
+      double getProb()const {return prob;}
+      double getLowProb()const {return lowProb;}
+      void setProb(double p){prob=p;}
+}; //}}}
+
+// Check if next fragment is different.
+bool nextFragDiffers(const ns_rD::fragmentP curF, const ns_rD::fragmentP nextF, bool mateNamesDiffer);
+// String comparison allowing last cmpEPS bases different as long as length
+// is the same.
+long readNameCmp(const char *str1, const char *str2);
+// Read Fragment from SAM file.
+// Copies data from 'next' fragment into 'cur' fragment and reads new fragment information into 'next'.
+// Fragment is either both paired-ends or just single read.
+bool readNextFragment(samfile_t* samData, ns_rD::fragmentP &cur, ns_rD::fragmentP &next);
+
+// Determine input format base either on --format flag or on the file extension.
+// Sets format to bam/sam and returns true, or returns false if format is unknown.
+bool setInputFormat(const ArgumentParser &args, string *format);
+
+bool openSamFile(const string &name, const string &inFormat, samfile_t **samFile);
+
+bool initializeInfoFile(const ArgumentParser &args, samfile_t *samFile, TranscriptInfo **trInfo, long *M);
+} // namespace ns_parseAlignment
+
+extern "C" int parseAlignment(int *argc,char* argv[]){
+string programDescription =
+"Pre-computes probabilities of (observed) reads' alignments.\n\
+   [alignment file] should be in either SAM or BAM format.\n";
+   TranscriptInfo *trInfo=NULL;
+   TranscriptSequence *trSeq=NULL;
+   TranscriptExpression *trExp=NULL;
+   MyTimer timer;
+   timer.start();
+   timer.start(7);
+   long Ntotal = 0, Nmap = 0, M=0, i;
+   string inFormat;
+   samfile_t *samData=NULL;
+   ReadDistribution readD;
+   ns_rD::fragmentP curF = new ns_rD::fragmentT, nextF = new ns_rD::fragmentT, validAF = new ns_rD::fragmentT;
+   // This could be changed to either GNU's hash_set or C++11's unsorted_set, once it's safe.
+   set<string> ignoredReads;
+   long ignoredMaxAlignments = 0, ignoredSingletons = 0;
+   // Intro: {{{
+   // Set options {{{
+   ArgumentParser args(programDescription,"[alignment file]",1);
+   args.addOptionS("o","outFile","outFileName",1,"Name of the output file.");
+   args.addOptionS("f","format","format",0,"Input format: either SAM, BAM.");
+   args.addOptionS("t","trInfoFile","trInfoFileName",0,"File to save transcript information extracted from [BS]AM file and reference.");
+   //args.addOptionS("t","trInfoFile","trInfoFileName",0,"If transcript(reference sequence) information is contained within SAM file, program will write this information into <trInfoFile>, otherwise it will look for this information in <trInfoFile>.");
+   args.addOptionS("s","trSeqFile","trSeqFileName",1,"Transcript sequence in FASTA format --- for non-uniform read distribution estimation.");
+   args.addOptionS("","trSeqHeader","trSeqHeader",0,"Transcript sequence header format enables gene name extraction (standard/gencode).","standard");
+   args.addOptionS("e","expressionFile","expFileName",0,"Transcript relative expression estimates --- for better non-uniform read distribution estimation.");
+   args.addOptionL("N","readsN","readsN",0,"Total number of reads. This is not necessary if [SB]AM contains also reads with no valid alignments.");
+   args.addOptionS("","failed","failed",0,"File name where to save names of reads that failed to align.");
+   args.addOptionB("","uniform","uniform",0,"Use uniform read distribution.");
+   args.addOptionD("","lenMu","lenMu",0,"Set mean of log fragment length distribution. (l_frag ~ LogNormal(mu,sigma^2))");
+   args.addOptionD("","lenSigma","lenSigma",0,"Set sigma^2 (or variance) of log fragment length distribution. (l_frag ~ LogNormal(mu,sigma^2))");
+   args.addOptionS("","distributionFile","distributionFileName",0,"Name of file to which read-distribution should be saved.");
+   args.addOptionL("P","procN","procN",0,"Maximum number of threads to be used. This provides speedup mostly when using non-uniform read distribution model (i.e. no --uniform flag).",4);
+   args.addOptionB("V","veryVerbose","veryVerbose",0,"Very verbose output.");
+   args.addOptionL("","noiseMismatches","numNoiseMismatches",0,"Number of mismatches to be considered as noise.",ns_rD::LOW_PROB_MISSES);
+   args.addOptionL("l","limitA","maxAlignments",0,"Limit maximum number of alignments per read. (Reads with more alignments are skipped.)");
+   args.addOptionB("","unstranded","unstranded",0,"Paired read are not strand specific.");
+   args.addOptionB("","show1warning","show1warning",0,"Show first alignments that are considered wrong (TID unknown, TID mismatch, wrong strand).");
+   args.addOptionB("","excludeSingletons","excludeSingletons",0,"Exclude single mate alignments for paired-end reads.");
+   args.addOptionB("","mateNamesDiffer","mateNamesDiffer",0,"Mates from paired-end reads have different names.");
+   if(!args.parse(*argc,argv))return 0;
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+   readD.setProcN(args.getL("procN"));
+   if(args.flag("show1warning"))readD.showFirstWarnings();
+   // }}}
+   if(!ns_parseAlignment::setInputFormat(args, &inFormat))return 1;
+   if(!ns_parseAlignment::openSamFile(args.args()[0], inFormat, &samData))return 1;
+   if(!ns_parseAlignment::initializeInfoFile(args, samData, &trInfo, &M))return 1;
+   // Read expression and initialize transcript sequence {{{
+   if(args.verbose)message("Initializing fasta sequence reader.\n");
+   // Initialize fasta sequence reader.
+   trSeq = new TranscriptSequence();
+   if(args.getLowerS("trSeqHeader") == "gencode"){
+      trSeq->readSequence(args.getS("trSeqFileName"), GENCODE); 
+   }else{
+      trSeq->readSequence(args.getS("trSeqFileName"), STANDARD); 
+   }
+   // Check numbers for transcripts match.
+   if(trSeq->getM() != M){
+      error("Main: Number of transcripts in the alignment file and the sequence file are different: %ld vs %ld\n",M,trSeq->getM());
+      return 1;
+   }
+   // Check that length of each transcript matches.
+   for(i=0;i<M;i++){
+      if(trInfo->L(i) != (long)(trSeq->getTr(i).size())){
+         error("Main: Transcript info length and sequence length of transcript %ld DO NOT MATCH! (%ld %d)\n",i,trInfo->L(i),(int)(trSeq->getTr(i).size()));
+         return 1;
+      }
+   }
+   // If there were gene names in transcript sequence, assign them to transcript info.
+   if(trSeq->hasGeneNames() && (trSeq->getG()>1)){
+      if(trInfo->getG() == 1){
+         // If just one gene present, then assign gene names.
+         if(args.verbose)message("Found gene names in sequence file, updating transcript information.\n");
+         trInfo->updateGeneNames(trSeq->getGeneNames());
+      }else{
+         // If there is more than one gene name already, don't fix.
+         if(trInfo->getG() != trSeq->getG()){
+            warning("Main: Different number of genes detected in transcript information and sequence file (%ld %ld).\n   You might want to check your data.\n", trInfo->getG(), trSeq->getG());
+         }
+      }
+   }
+   // If format is GENCODE and transcript names were extracted, update.
+   if((args.getLowerS("trSeqHeader") == "gencode")&&(trSeq->hasTrNames())){
+      if(args.flag("veryVerbose"))message("Updating transcript names.\n");
+      if(!trInfo->updateTrNames(trSeq->getTrNames())){
+         if(args.flag("veryVerbose"))warning("Transcript names update failed.\n");
+      }
+   }
+   if(!args.flag("uniform")){
+      // Try loading expression file from previous estimation for non-uniform read model.
+      if(args.isSet("expFileName")){
+         if(args.verbose)message("Loading transcript initial expression data.\n");
+         trExp = new TranscriptExpression(args.getS("expFileName"), GUESS);
+         if(trExp->getM() != M){
+            error("Main: Number of transcripts in the alignment file and the expression file are different: %ld vs %ld\n",M,trExp->getM());
+            return 1;
+         }
+      }
+   }
+   //}}}
+   timer.split(0,'m');
+   //}}}
+
+   // Estimating probabilities {{{
+   bool analyzeReads = false;
+
+   if(args.isSet("lenMu") && args.isSet("lenSigma")){
+      readD.setLength(args.getD("lenMu"),args.getD("lenSigma"));
+   }else{
+      analyzeReads = true;
+   }
+   if(args.flag("uniform")){
+      if(args.verbose)message("Using uniform read distribution.\n");
+      readD.initUniform(M,trInfo,trSeq,args.flag("veryVerbose"));
+   }else{
+      if(args.verbose)message("Estimating non-uniform read distribution.\n");
+      readD.init(M,trInfo,trSeq,trExp,args.flag("unstranded"),args.flag("veryVerbose"));
+      if(args.flag("veryVerbose"))message(" ReadDistribution initialization done.\n");
+      analyzeReads = true;
+   }
+   if(args.isSet("numNoiseMismatches")){
+      readD.setLowProbMismatches(args.getL("numNoiseMismatches"));
+   }
+   // fill in "next" fragment:
+   // Counters for all, Good Alignments; and weird alignments
+   long observeN, pairedGA, firstGA, secondGA, singleGA, weirdGA, allGA, pairedBad;
+   bool storedValidA = false;
+   long RE_noEndInfo, RE_weirdPairdInfo, RE_nameMismatch;
+   long maxAlignments = 0;
+   if(args.isSet("maxAlignments") && (args.getL("maxAlignments")>0))
+      maxAlignments = args.getL("maxAlignments");
+   // start counting (and possibly estimating):
+   observeN = pairedGA = firstGA = secondGA = singleGA = weirdGA = pairedBad = 0;
+   RE_noEndInfo = RE_weirdPairdInfo = RE_nameMismatch = 0;
+   ns_parseAlignment::readNextFragment(samData, curF, nextF);
+   while(ns_parseAlignment::readNextFragment(samData,curF,nextF)){
+      R_INTERUPT;
+      if( !(curF->first->core.flag & BAM_FUNMAP) ){
+         // (at least) The first read was mapped.
+         if( curF->paired ) {
+            // Fragment's both reads are mapped as a pair.
+            // Check mates' names.
+            if((ns_parseAlignment::readNameCmp(bam1_qname(curF->first), bam1_qname(curF->second))==0) || 
+               (args.flag("mateNamesDiffer"))){
+               pairedGA++;
+            }else{
+               pairedBad++;
+               if(RE_nameMismatch == 0){
+                  warning("Paired read name mismatch: %s %s\n",bam1_qname(curF->first), bam1_qname(curF->second));
+               }
+               RE_nameMismatch++;
+               if(RE_nameMismatch>10)break;
+            }
+         }else {
+            if (curF->first->core.flag & BAM_FPAIRED) {
+               // Read was part of pair (meaning that the other is unmapped).
+               if (curF->first->core.flag & BAM_FREAD1) {
+                  firstGA++;
+               } else if (curF->first->core.flag & BAM_FREAD2) {
+                  secondGA++;
+               } else weirdGA ++;
+            } else {
+               // Read is single end, with valid alignment.
+               singleGA++;
+            }
+         }
+         // Unless pairedBad>0 the alignment is valid.
+         // If excludeSingletons is set, only use paired alignment and alignments of single-end reads.
+         if((!storedValidA) && 
+            (((!args.flag("excludeSingletons")) && (pairedBad == 0)) ||
+             (pairedBad + firstGA + secondGA + weirdGA == 0))){
+            validAF->copyFragment(curF);
+            storedValidA=true;
+         }
+      }
+      // Next fragment is different.
+      if(ns_parseAlignment::nextFragDiffers(curF, nextF, args.flag("mateNamesDiffer"))){
+         Ntotal++;
+         allGA = singleGA + pairedGA + firstGA +secondGA+ weirdGA;
+         if( allGA == 0 ){ // No good alignment.
+            // Just in case:
+            storedValidA=false;
+            pairedBad = 0;
+            continue;
+         }
+         Nmap ++;
+         if(weirdGA)RE_noEndInfo++;
+         if((singleGA>0) && (pairedGA>0)) RE_weirdPairdInfo++;
+         // If it's good uniquely aligned fragment/read, add it to the observation.
+         if(( allGA == 1) && analyzeReads && (pairedBad == 0) && storedValidA){
+            if(readD.observed(validAF))observeN++;
+         }else if(maxAlignments && (allGA>maxAlignments)) {
+            // This read will be ignored.
+            ignoredReads.insert(bam1_qname(curF->first));
+            ignoredMaxAlignments++;
+            Nmap --;
+         }else if(args.flag("excludeSingletons") && (pairedGA + singleGA == 0)){
+            // When excluding singletons only alignments of full pair or single-end read count.
+            ignoredReads.insert(bam1_qname(curF->first));
+            ignoredSingletons++;
+            Nmap --;
+         }
+         pairedGA = firstGA = secondGA = singleGA = weirdGA = pairedBad = 0;
+         storedValidA = false;
+      }
+   }
+   if(RE_nameMismatch>10){
+      error("Names of paired mates didn't match at least 10 times.\n"
+            "   Something is possibly wrong with your data or the reads have to be renamed.\n");
+      return 1;
+   }
+   message("Reads: all(Ntotal): %ld  mapped(Nmap): %ld\n",Ntotal,Nmap);
+   if(args.verbose)message("  %ld reads were used to estimate empirical distributions.\n",observeN);
+   if(ignoredMaxAlignments>0)message("  %ld reads are skipped due to having more than %ld alignments.\n",ignoredMaxAlignments, maxAlignments);
+   if(ignoredSingletons>0)message("  %ld reads skipped due to having just single mate alignments.\n",ignoredSingletons);
+   if(RE_noEndInfo)warning("  %ld reads that were paired, but do not have \"end\" information.\n  (is your alignment file valid?)", RE_noEndInfo);
+   if(RE_weirdPairdInfo)warning("  %ld reads that were reported as both paired and single end.\n  (is your alignment file valid?)", RE_weirdPairdInfo);
+   readD.writeWarnings();
+   if(args.flag("veryVerbose"))timer.split(0,'m');
+   // Normalize read distribution:
+   if(args.flag("veryVerbose"))message("Normalizing read distribution.\n");
+   readD.normalize();
+   if(args.isSet("distributionFileName")){
+      readD.logProfiles(args.getS("distributionFileName"));
+   }
+   timer.split(0,'m');
+   // }}}
+
+   // Writing probabilities: {{{
+   // Re-opening alignment file 
+   if(!ns_parseAlignment::openSamFile(args.args()[0], inFormat, &samData))return 1;
+   if(args.verbose)message("Writing alignment probabilities.\n");
+   double prob,probNoise,minProb;
+   prob = probNoise = 0;
+   set<string> failedReads;
+   vector<ns_parseAlignment::TagAlignment> alignments;
+   // Open and initialize output file {{{
+   ofstream outF(args.getS("outFileName").c_str());
+   if(!outF.is_open()){
+      error("Main: Unable to open output file.\n");
+      return 1;
+   }
+   outF<<"# Ntotal "<<Ntotal<<"\n# Nmap "<<Nmap<<"\n# M "<<M<<endl;
+   outF<<"# LOGFORMAT (probabilities saved on log scale.)\n# r_name num_alignments (tr_id prob )^*{num_alignments}"<<endl;
+   outF.precision(9);
+   outF<<scientific;
+   // }}}
+   
+   // start reading:
+   timer.start(1);
+   bool invalidAlignment = false;
+   long readC, pairedN, singleN, firstN, secondN, weirdN, invalidN, noN;
+   readC = pairedN = singleN = firstN = secondN = weirdN = invalidN = noN = 0;
+   RE_nameMismatch = 0 ;
+   // fill in "next" fragment:
+   ns_parseAlignment::readNextFragment(samData, curF, nextF);
+   while(ns_parseAlignment::readNextFragment(samData,curF,nextF)){
+      R_INTERUPT;
+      // Skip all alignments of this read.
+      if(ignoredReads.count(bam1_qname(curF->first))>0){
+         DEBUG_AT(" ignore\n");
+         // Read reads while the name is the same.
+         while(ns_parseAlignment::readNextFragment(samData,curF,nextF)){
+            DEBUG_AT(" ignore\n");
+            if(ns_parseAlignment::nextFragDiffers(curF, nextF, args.flag("mateNamesDiffer")))
+               break;
+         }
+         readC++;
+         if(args.verbose){ if(progressLog(readC,Ntotal,10,' '))timer.split(1,'m');}
+         continue;
+      }
+      if( !(curF->first->core.flag & BAM_FUNMAP) ){
+         DEBUG_AT("M");
+         // (at least) The first read was mapped.
+         // Check mates' names.
+         if(curF->paired && (ns_parseAlignment::readNameCmp(bam1_qname(curF->first), bam1_qname(curF->second))!=0) && (!args.flag("mateNamesDiffer"))){
+            if(RE_nameMismatch == 0){
+               warning("Paired read name mismatch: %s %s\n",bam1_qname(curF->first), bam1_qname(curF->second));
+            }
+            RE_nameMismatch++;
+            if(RE_nameMismatch>10)break;
+            invalidAlignment = true;
+         }else if((!args.flag("excludeSingletons")) || curF->paired || (! (curF->first->core.flag & BAM_FPAIRED))){
+            // We only calculate probabilties and add alignments if: 
+            // (singletons are not exlucded) OR  (it is a proper paired alignments) OR (it is single-end read)
+            if(readD.getP(curF, prob, probNoise)){
+               // We calculated valid probabilities for this alignment.   
+               // Add alignment:
+               alignments.push_back(ns_parseAlignment::TagAlignment(curF->first->core.tid+1, prob, probNoise));
+               // Update counters:
+               if( curF->paired ) {
+                  // Fragment's both reads are mapped as a pair.
+                  pairedN++;
+                  DEBUG_AT(" P\n");
+               }else {
+                  if (curF->first->core.flag & BAM_FPAIRED) {
+                     // Read was part of pair (meaning that the other is unmapped).
+                     if (curF->first->core.flag & BAM_FREAD1) {
+                        firstN++;
+                        DEBUG_AT(" 1\n");
+                     } else if (curF->first->core.flag & BAM_FREAD2) {
+                        secondN++;
+                        DEBUG_AT(" 2\n");
+                     } else {
+                        weirdN ++;
+                        DEBUG_AT(" W\n");
+                     }
+                  } else {
+                     // Read is single end, with valid alignment.
+                     singleN++;
+                     DEBUG_AT(" S\n");
+                  }
+               }
+            } else {
+               // Calculation of alignment probabilities failed.
+               invalidAlignment = true;
+            }
+         }
+      }else DEBUG_AT("UNMAP\n");
+      // next fragment has different name
+      if(ns_parseAlignment::nextFragDiffers(curF, nextF, args.flag("mateNamesDiffer"))){
+         DEBUG_AT("  last\n");
+         readC++;
+         if(args.verbose){ if(progressLog(readC,Ntotal,10,' '))timer.split(1,'m');}
+         if(!alignments.empty()){
+            outF<<bam1_qname(curF->first)<<" "<<alignments.size()+1;
+            minProb = 1;
+            for(i=0;i<(long)alignments.size();i++){
+               if(minProb>alignments[i].getLowProb())minProb = alignments[i].getLowProb();
+               outF<<" "<<alignments[i].getTrId()
+//                   <<" "<<getStrandC(alignments[i].getStrand())
+                   <<" "<<alignments[i].getProb();
+            }
+            outF<<" 0 "<<minProb<<endl;
+            alignments.clear();
+         }else{
+            // read has no valid alignments:
+            if(invalidAlignment){
+               // If there were invalid alignments, write a mock record in order to keep Nmap consistent.
+               invalidN++;
+               outF<<bam1_qname(curF->first)<<" 1 0 0"<<endl;
+            }else {
+               noN++;
+            }
+            if(args.isSet("failed")){
+               // Save failed reads.
+               failedReads.insert(bam1_qname(curF->first));
+               if(curF->paired)failedReads.insert(bam1_qname(curF->second));
+            }
+         }
+         invalidAlignment = false;
+      }
+   }
+   if(RE_nameMismatch>10){
+      error("Names of paired mates didn't match at least 10 times.\n"
+            "   Something is possibly wrong with your data or the reads have to be renamed.\n");
+      return 1;
+   }
+   outF.close();
+   timer.split(0,'m');
+   if(args.verbose){
+      message("Analyzed %ld reads:\n",readC);
+      if(ignoredMaxAlignments>0)message(" %ld ignored due to --limitA flag\n",ignoredMaxAlignments);
+      if(invalidN>0)message(" %ld had only invalid alignments (see warnings)\n",invalidN);
+      if(noN>0)message(" %ld had no alignments\n",noN);
+      message("The rest had %ld alignments:\n",pairedN+singleN+firstN+secondN+weirdN);
+      if(pairedN>0)message(" %ld paired alignments\n",pairedN);
+      if(firstN+secondN+weirdN>0)
+         message(" %ld half alignments (paired-end mates aligned independently)\n",firstN+secondN+weirdN);
+      if(singleN>0)message(" %ld single-read alignments\n",singleN);
+      //flushStdout();
+      messageFlush();
+   }else {
+      messageF("Alignments: %ld.\n",pairedN+singleN+firstN+secondN+weirdN);
+   }
+   readD.writeWarnings();
+   if(args.flag("veryVerbose")){
+      message("Number of weights cached: %ld\n",readD.getWeightNormCount());
+   }
+   // Deal with reads that failed to align {{{
+   if(args.isSet("failed")){
+      outF.open(args.getS("failed").c_str());
+      if(outF.is_open()){
+         for(set<string>::iterator setIt=failedReads.begin(); setIt!=failedReads.end();setIt++)
+            outF<<*setIt<<endl;
+         outF.close();
+      }
+   } //}}}
+   // Compute effective length and save transcript info {{{
+   if(args.isSet("trInfoFileName")){
+      if(args.verbose)messageF("Computing effective lengths.\n");
+      trInfo->setEffectiveLength(readD.getEffectiveLengths());
+      if(! trInfo->writeInfo(args.getS("trInfoFileName"))){
+         warning("Main: File %s probably already exists.\n"
+                 "   Will save new transcript info into %s-NEW.\n",(args.getS("trInfoFileName")).c_str(),(args.getS("trInfoFileName")).c_str());
+         if(! trInfo->writeInfo(args.getS("trInfoFileName")+"-NEW", true)){ // DO OVERWRITE
+            warning("Main: Writing into %s failed!.",(args.getS("trInfoFileName")+"-NEW").c_str());
+         }
+      }else {
+         if(args.verbose)message("Transcript information saved into %s.\n",(args.getS("trInfoFileName")).c_str());
+      }
+      if(args.verbose)timer.split(0,'m');
+   } //}}}
+   // Close, free and write failed reads if filename provided {{{
+   delete curF;
+   delete nextF;
+   delete validAF;
+   delete trInfo;
+   delete trSeq;
+   delete trExp;
+   samclose(samData);
+   // }}}
+   // }}}
+   if(args.verbose)message("DONE. ");
+   timer.split(7,'m');
+   return 0;
+}
+
+#ifndef BIOC_BUILD
+int main(int argc,char* argv[]){
+   return parseAlignment(&argc,argv);
+}
+#endif
+
+namespace ns_parseAlignment {
+
+bool nextFragDiffers(const ns_rD::fragmentP curF, const ns_rD::fragmentP nextF, bool mateNamesDiffer){//{{{
+   if(readNameCmp(bam1_qname(curF->first), bam1_qname(nextF->first))==0) return false;
+   if(nextF->paired && mateNamesDiffer && (readNameCmp(bam1_qname(curF->first), bam1_qname(nextF->second))==0)) return false;
+   return true;
+}//}}}
+
+long readNameCmp(const char *str1, const char *str2){//{{{
+   // Check first character(so that we can look back later).
+   if(*str1 != *str2)return *str1 - *str2;
+   while(*str1 || *str2){
+      if(*str1 != *str2){
+         // They can differ in last character if its preceeeded by /:_.
+         if(*str1 && *str2 && (!*(str1+1)) && (!*(str2+1)) && 
+            ((*(str1-1) == '/') || (*(str1-1) == ':') || (*(str1-1) == '_'))){
+            return 0;
+         }
+         return *str1 - *str2;
+      }
+      str1++;
+      str2++;
+   }
+   return 0;
+}//}}}
+
+bool readNextFragment(samfile_t* samData, ns_rD::fragmentP &cur, ns_rD::fragmentP &next){//{{{
+   static ns_rD::fragmentP tmpF = NULL;
+   bool currentOK = true;
+   // switch current to next:
+   tmpF = cur;
+   cur = next;
+   next = tmpF;
+   // check if current fragment is valid
+   if( !cur->first->data || ( *(cur->first->data) == '\0')){
+      // current fragment is invalid
+      currentOK = false;
+   }
+   // try reading next fragment:
+   if(samread(samData,next->first)<0){
+      // read failed: set next reads name to empty string
+      *(next->first->data) = '\0';
+      return currentOK;
+   }
+   // Read proper pairs OR pairs with both mates unmapped into one fragment.
+   if((next->first->core.flag & BAM_FPROPER_PAIR) ||
+      ((next->first->core.flag & BAM_FPAIRED) &&
+       (next->first->core.flag & BAM_FUNMAP) &&
+       (next->first->core.flag & BAM_FMUNMAP))){
+      next->paired = true;
+      // Try reading second mate.
+      if(samread(samData,next->second)<0) next->paired = false;
+   }else{
+      next->paired = false;
+   }
+   /* Note:
+    * Relying on BAM_FREAD2 as being the last read of template probably does not work.
+    */
+   return currentOK;
+}//}}}
+
+bool setInputFormat(const ArgumentParser &args, string *format){//{{{
+   if(args.isSet("format")){
+      *format = args.getLowerS("format");
+      if((*format =="sam")||(*format == "bam")){
+         return true;
+      }
+      warning("Unknown format '%s'.\n",format->c_str());
+   }
+   string fileName = args.args()[0];
+   string extension = fileName.substr(fileName.rfind(".")+1);
+   *format = ns_misc::toLower(extension);
+   if((*format =="sam")||(*format == "bam")){
+      if(args.verb())message("Assuming alignment file in '%s' format.\n",format->c_str());
+      return true;
+   }
+   message("Unknown extension '%s'.\n",extension.c_str());
+   error("Couldn't determine the type of input file, please use --format and check your input.\n");
+   return false;
+}//}}}
+
+bool openSamFile(const string &name, const string &inFormat, samfile_t **samFile){//{{{
+   if(*samFile != NULL)samclose(*samFile);
+   if(inFormat=="bam") *samFile = samopen(name.c_str(), "rb" , NULL);
+   else *samFile = samopen(name.c_str(), "r" , NULL);
+   if(*samFile == NULL){
+      error("Failed re-reading alignments.\n");
+      return false;
+   }
+   return true;
+}//}}}
+
+bool initializeInfoFile(const ArgumentParser &args, samfile_t *samFile, TranscriptInfo **trInfo, long *M){//{{{
+   if((samFile->header == NULL)||(samFile->header->n_targets == 0)){
+      if(! args.isSet("trInfoFileName")){
+         error("Main: alignment file does not contain header, or the header is empty.\n"
+               "  Please either include header in alignment file or provide transcript information file.\n"
+               "  (option --trInfoFile, file should contain lines with <gene name> <transcript name> <transcript length>.\n");
+         return false;
+      }else{
+         if(args.verb())message("Using %s for transcript information.\n",(args.getS("trInfoFileName")).c_str());
+         if((*trInfo = new TranscriptInfo(args.getS("trInfoFileName"))) && (*trInfo)->isOK()){
+            *M=(*trInfo)->getM();
+         }else {
+            error("Main: Can't get transcript information.\n");
+            return false;
+         }
+      }
+   }else{
+      if(args.verbose)message("Using alignments' header for transcript information.\n");
+      *M = samFile->header->n_targets;
+      vector<string> trNames(*M);
+      vector<long> trLengths(*M);
+      for(long i=0;i<*M;i++){
+         trNames[i] = samFile->header->target_name[i];
+         trLengths[i] = samFile->header->target_len[i]; 
+      }
+      *trInfo = new TranscriptInfo();
+      if(! (*trInfo)->setInfo(vector<string>(*M,"none"), trNames, trLengths)){
+         error("TranscriptInfo not initialized.\n");
+         return false;
+      }
+   }
+   return true;
+}//}}}
+
+} // namespace ns_parseAlignment
diff --git a/parseAlignment.py b/parseAlignment.py
new file mode 100755
index 0000000..13ecc18
--- /dev/null
+++ b/parseAlignment.py
@@ -0,0 +1,482 @@
+#!/usr/bin/python
+# Initialization {{{
+import sys
+import numpy as np
+def normpdf(x,m,s):
+   return 1./(s*2.5066282746310002)*np.exp(-1./(2.0*s*s)*(x-m)**2.)
+import os, time # needed for this:
+time_str = time.strftime("%b %e %Y %H:%M:%S", time.gmtime(os.lstat(sys.argv[0]).st_mtime));
+print "###",os.path.basename(sys.argv[0]),"build:",time_str;
+# {{{ parse arguments and set filenames
+from optparse import OptionParser
+parser = OptionParser(usage="%prog [options]\n -a -t are necessary\n -e is adviced")
+parser.add_option("-T", "--transcriptPrefix", dest="tPref", help="Prefix of transcript names within MAP file (e.g. hg19_ensGene_ for ensembl genes from UCSC)", type="string")
+parser.add_option("-p", "--prefix", dest="pref", help="Experiment prefix, use same prefix for all files (.map, .tr, .prob)", type="string")
+parser.add_option("-a", "--alignmentFile", dest="aFile", help="Alignments file name", type="string")
+parser.add_option("-A", "--alignmentFileType", dest="aType", default="bowtie", help="Alignments file type", type="string")
+parser.add_option("-t", "--transcriptFile", dest="tFile", help="File with with list of transcripts (second column) and their lengths (third column, used later).", type="string")
+parser.add_option("-o", "--out", dest="oFile", help="Output name (should end with .prob).", type="string")
+parser.add_option("-N", "--totalN", dest = "totalN", help="Total number of reads. If <name>.map.bowtieLog does not exist this number has to be provided", type="int")
+parser.add_option("-i", "--inputType", dest = "inputType", help="Input file type determines the assignemnt of probability for each read (fastq, fastq33, fasta)", default="fastq");
+parser.add_option("-v", "--verbose", default=False, dest="verbose",  action="store_true", help="Verbose output")
+parser.add_option("--vv", default=False, dest="veryVerbose",  action="store_true", help="Very verbose output")
+parser.add_option("--paired", default=False, dest="paired", action="store_true", help="Flag fo paired alignemnts")
+parser.add_option("--IamSure", default=False, dest="amSure",  action="store_true", help="I am sure I want to use this.")
+
+
+(options, args) = parser.parse_args()
+
+if not options.amSure:
+   sys.exit("Please use new implementation of parsing algorithm \"parseAlignment\". If you really want to use this program use the option --IamSure.");
+
+
+if options.tPref !=None:
+   prefixL = len(options.tPref);
+else:
+   prefixL = 0;
+
+if options.pref :
+   aFileName=options.pref+".map"
+   oFileName=options.pref+".prob"
+   tFileName=options.pref+".tr"
+else:
+   if not options.aFile:
+      sys.exit("Need alignemnt file name.");
+   if not options.oFile:
+      sys.exit("Need output file name.");
+   if not options.tFile:
+      sys.exit("Need transcript file name.");
+if options.aFile:
+   aFileName=options.aFile
+if options.oFile:
+   oFileName=options.oFile
+if options.tFile:
+   tFileName=options.tFile;
+#}}}
+#{{{ get total number of reads, possibly from <file>.map.bowtieLog
+Ntotal = 0
+if options.totalN :
+   Ntotal = options.totalN;
+else:
+   try:
+      bLog = open(aFileName+".bowtieLog");
+      for line in bLog:
+         if line.find("# reads processed:")>-1:
+            Ntotal = int( line[line.find("# reads processed:")+18:].split()[0] ); 
+            # in other words take first wor after "reads processed:" and convert it to Ntotal
+            break;
+      bLog.close();
+      if Ntotal <= 0: 
+         sys.exit("File read, but Ntotal was "+str(Ntotal));
+   except:
+      sys.exit( "Was not able to read file "+aFileName+".bowtieLog . Please provide number of reads (-N atribute) or the log file.")
+#}}}
+def nuc2i(str):#{{{
+   if str.lower() == "a": return 0;
+   if str.lower() == "c": return 1;
+   if str.lower() == "g": return 2;
+   if str.lower() == "t": return 3;
+   return 4;
+#}}}
+def verbose(str):#{{{
+   if options.verbose:
+      print str;
+#}}}
+verbose("Using files:\n   "+aFileName+" for reading alignments\n   "+oFileName+" for writing probabilities\n   "+tFileName+" for writing transcript info");
+# {{{ reading transcript info
+try:
+   tFile = open(tFileName,"r")
+except:
+   sys.exit("Unable to open transcript file: "+tFileName+" .");
+
+trMap=dict()
+i=0;
+for line in tFile:
+   if line[0] == '#': continue;
+   trMap[line.split()[1]]=i+1;
+   #trMap[line.split()[1][prefixL:]]=i+1;
+   i+=1;
+trN=i;
+tFile.close();
+#}}}
+# {{{ open output file
+try:
+   oFile = open(oFileName,"w");
+except:
+   sys.exit("Unable to open output file: "+oFileName+" .");
+#}}}
+#{{{ open alignment file and check number of columns
+if options.aType != "bowtie":
+   sys.exit("Unrecognized alignment type.");
+try:
+   aFile = open(aFileName,"r")
+except:
+   sys.exit("Unable to open alignments file: "+aFileName+" .");
+
+alignment=aFile.readline().rstrip().split("\t");
+columnN=len(alignment)+1; # expect no mismatch info
+try:
+   x = int(alignment[columnN-2]); # this works if last column is NOT mismatch info
+except:
+   columnN -= 1; # otherwise decrease number of columns
+colS = columnN - 8; # if 8 columns, no shift necessary 
+verbose("columns: "+str(columnN));
+aFile.seek(0);
+#}}}
+# }}}
+
+if options.inputType=="fasta": #{{{
+   minReadLength=25;
+   pseudoCount = 1.0;
+   nucProb = [[[pseudoCount for i in range(5)] for k in range(5)] for j in range(minReadLength)];
+   noiseProb = [pseudoCount for i in range(5)];
+
+   verbose("Estimating mismatch probability.");  # {{{
+   readId=""
+   mismatch=""
+   hadMismatches=True;
+   readN = 0;
+   verbose("Use all reads, not only unique.");
+   for line in aFile:
+      alignment=line.rstrip().split("\t");
+
+      readSeq=alignment[4+colS]
+      if alignment[1+colS]=="-":
+         readSeq = readSeq[::-1];
+      
+      if alignment[0] != readId or readSeq != seq:
+         readId=alignment[0];
+         readN+=1;
+         if not hadMismatches:
+            while len(seq) > len(nucProb):
+                  nucProb.append([[pseudoCount for i in range(5)] for k in range(5)]);
+            for i in range(len(seq)):
+               nuc1 = nuc2i(seq[i]);
+               nucProb[i][nuc1][nuc1]+=1;
+
+         hadMismatches=False;
+         seq = readSeq;
+         mismatch=""
+         for nuc in seq:
+            noiseProb[nuc2i(nuc)]+=1
+
+      if len(alignment)==columnN:
+         if alignment[columnN-1] != mismatch:
+            while len(seq) > len(nucProb):
+                  nucProb.append([[pseudoCount for i in range(5)] for k in range(5)]);
+            for i in range(len(seq)):
+               nuc1 = nuc2i(seq[i]);
+               nucProb[i][nuc1][nuc1]+=1;
+            hadMismatches = True;
+
+            mismatch=alignment[columnN-1]
+            mismatchArray = mismatch.split(",");
+            for mis in mismatchArray:
+               pos = int( mis.split(":")[0] );
+               nuc1 = nuc2i( mis.split(":")[1].split(">")[0] );
+               nuc2 = nuc2i( mis.split(":")[1].split(">")[1] );
+   #            while pos >= len(nucProb):
+   #               nucProb.append([[pseudoCount for i in range(5)] for k in range(5)]);
+               nucProb[pos][ nuc2 ][ nuc2 ]-=1;
+               if nucProb[pos][nuc2][nuc2]<1 : print pos,nuc2,seq,mismatch;
+               nucProb[pos][ nuc1 ][ nuc2 ]+=1;
+   # }}}
+   """verbose("Using only unique reads");#{{{ 
+for line in aFile:
+   alignment=line.split();
+   if alignment[4] != seq:
+      seq=alignment[4]
+
+      if mismatch != "":
+         mismatchArray = mismatch.split(",");
+         for mis in mismatchArray:
+            pos = int( mis.split(":")[0] );
+            nuc1 = mis.split(":")[1].split(">")[0];
+            nuc2 = mis.split(":")[1].split(">")[1];
+            while pos <= len(nucProb):
+               nusProb.append([[pseudoCount for i in range(5)] for k in range(5)]);
+            nucProb[pos][ nuc2i(nuc1) ][ nuc2i(nuc2) ]+=1;
+      if len(alignment>7):
+         mismatch=alignment[7];
+      
+      for nuc in seq:
+         noiseProb[nuc2i(nuc)]+=1
+   else:
+      mismatch=""
+#}}}"""
+   verbose("Estimating probability of noise from aligned reads.") #{{{
+   total=sum(noiseProb);
+   for i in range(5):
+      noiseProb[i] /= total;
+
+   verbose("Estimating nucleotide mismatch matrix.");
+   for i in range(len(nucProb)):
+      for j in range(5):
+         total = sum( nucProb[i][j] );
+         for k in range(5):
+            nucProb[i][j][k] /= total;
+
+   if options.veryVerbose:
+      print "Noise probabilities: ";
+      print "   ",;
+      print noiseProb;
+      print "Nucleotide mismatch matrix:";
+      for i in range(len(nucProb)):
+         print "Position ",i,":\n   ",;
+         print nucProb[i];
+   #}}}
+   verbose("Writing alignment probabilities"); # {{{
+   aFile.seek(0);
+
+   alignment=aFile.readline().rstrip().split("\t");
+   readId=alignment[0];
+   if alignment[1+colS] == "+":
+      seq=alignment[4+colS];
+   else:
+      seq=alignment[4+colS][::-1];
+   prob = 1.0;
+   for nuc in seq:
+      prob *= noiseProb[nuc2i(nuc)];
+   alignments=[(0,alignment[1+colS],prob)];
+
+   aFile.seek(0);
+   alN = 0;
+   oFile.write("# Ntotal "+str(Ntotal)+"\n");
+   oFile.write("# Nmap "+str(readN)+"\n");
+
+   for line in aFile:
+      alignment=line.rstrip().split("\t");
+      alN+=1;
+      
+      readSeq=alignment[4+colS]
+      if alignment[1+colS]=="-":
+         readSeq = readSeq[::-1];
+      
+      # write old and init new reads
+      if readId!=alignment[0] or readSeq!=seq:
+         readId = readId.replace(" ","_");
+         oFile.write(readId+" "+str(len(alignments))+" alignments:");
+         for align in alignments:
+            oFile.write(" " + str(align[0]) + " " + align[1] + " " + str(align[2]));
+
+         oFile.write("\n");
+
+         readId=alignment[0];
+         seq = readSeq;
+         del alignments[:]
+         prob = 1.0;
+         for nuc in seq:
+            prob *= noiseProb[nuc2i(nuc)];
+         alignments.append((0,alignment[1+colS],prob));
+
+      # set transcript id
+      if alignment[2+colS][prefixL:] in trMap:
+         trans = trMap[ alignment[2+colS][prefixL:] ];
+      else:
+         trans = 0;
+         print "Transcript '"+alignment[2+colS]+"' or '"+alignment[2+colS][prefixL:]+"' was not found in the transcript file.";
+         #print alignment;
+      # calculate probabilities
+      prob=1.0;
+      for i in range(len(seq)):
+         nuc1 = nuc2i(seq[i]);
+         prob *= nucProb[i][nuc1][nuc1];
+      
+      if len(alignment)==columnN:
+            mismatch=alignment[columnN-1]
+            mismatchArray = mismatch.split(",");
+            for mis in mismatchArray:
+               pos = int( mis.split(":")[0] );
+               nuc1 = nuc2i( mis.split(":")[1].split(">")[0] );
+               nuc2 = nuc2i( mis.split(":")[1].split(">")[1] );
+               prob /= nucProb[pos][ nuc2 ][ nuc2 ];
+               prob *= nucProb[pos][ nuc1 ][ nuc2 ];
+      # add new alignment to list
+      alignments.append( (trans, alignment[1+colS], prob) );
+   #   if len(alignments)>2 and alignments[len(alignments)-1][2]!=alignments[len(alignments)-2][2]:
+   #      print readId;
+
+      
+   readId = readId.replace(" ","_");
+   oFile.write(readId+" "+str(len(alignments))+" alignments:");
+   for align in alignments:
+      oFile.write(" " + str(align[0]) + " " + str(align[1]) + " " + str(align[2]));
+   oFile.write("\n");
+   # }}}
+# end if options.inputType=="fasta" }}}
+else:
+   # {{{ qTOp functions
+   if options.inputType=="fastq": Qshift=64;
+   if options.inputType=="fastq33": Qshift=33;
+   phredWarning = False;
+   def qTOp(Q):
+      phredS = float(ord(Q)-Qshift);
+      if phredS<0:
+         if not phredWarning:
+            print "WARNING: Phred score too low (",int(phredS),") perhpas use --inputType fastq33.";
+            phredWarning=True;
+      elif phredS>65:
+         if not phredWarning:
+            print "NOTE: Phred score unnaturally high (",int(phredS),") check your input type and perhaps set --inputType fastq.";
+            phredWarning=True;
+      return 1-10**( phredS / -10);
+   def qTOpInvert(Q):
+      p = 1-10**(float(ord(Q)-Qshift) / -10);
+      if p==0: return 1;
+      return (1-p)/p;
+   #}}}
+   # {{{ counting reads
+   readN = 0
+   rId = "";
+   seq = "";
+   phread = "";
+   aFile.seek(0);
+   frags=[]
+   while True:
+      line = aFile.readline();
+      if line == "": break; # empty line means end of file
+      if options.paired: 
+         line2=aFile.readline();
+      
+      alignment=line.rstrip().split("\t");
+      readId=alignment[0];
+      readSeq=alignment[4+colS]
+      readPhread=alignment[5+colS];
+      
+      if readId != rId or readSeq != seq or readPhread != phread:
+         readN+=1;
+         rId=readId;
+         seq=readSeq;
+         phread=readPhread;
+         if options.paired:
+            frags.append( int(line2.rstrip().split("\t")[3+colS]) - int(alignment[3+colS]) );
+   if options.paired:
+      fragMu = np.mean(frags)
+      fragStD = np.std(frags)
+   # }}}
+   verbose("Writing alignment probabilities");
+   aFile.seek(0);
+   #{{{ read first read identificators
+   alignment=aFile.readline().rstrip().split("\t");
+   readId=alignment[0];
+   if alignment[1+colS] == "+":
+      seq=alignment[4+colS];
+      phread=alignment[5+colS]
+   else:
+      seq=alignment[4+colS][::-1];
+      phread=alignment[5+colS][::-1]
+   prob=1.0;
+   for Q in phread:
+      prob *= qTOp(Q);
+
+   if options.paired: #secon pair
+      align2 = aFile.readline().rstrip().split("\t");
+      fragL = int( align2[3+colS]) - int(alignment[3+colS]);
+      prob *= normpdf(fragL,fragMu,fragStD);
+      if align2[1+colS] == "+":
+         phread2=align2[5+colS]
+      else:
+         phread2=align2[5+colS][::-1]
+      for Q in phread2:
+         prob *= qTOp(Q);
+
+   alignments=[]
+   aFile.seek(0);
+   #}}}
+   alN = 0;
+   oFile.write("# Ntotal "+str(Ntotal)+"\n");
+   oFile.write("# Nmap "+str(readN)+"\n");
+
+   while True:
+      line=aFile.readline();
+      if line == "": break; # empty line means end of file
+      alignment=line.rstrip().split("\t");
+
+      alN+=1;
+      
+      readSeq=alignment[4+colS]
+      readPhread=alignment[5+colS]
+      if alignment[1+colS]=="-":
+         readPhread = readPhread[::-1]
+         readSeq = readSeq[::-1];
+      if options.paired:
+         align2 = aFile.readline().rstrip().split("\t")
+         r2Phread = align2[5+colS]
+         if align2[1+colS]=="-":
+            r2Phread = r2Phread[::-1];
+      else: r2Phread = "";
+      
+      # write old and init new reads
+      if readId!=alignment[0] or readSeq!=seq or readPhread!=phread:
+         readId = readId.replace(" ","_");
+         oFile.write(readId+" "+str(len(alignments)+1)+" alignments:");
+         minProb = 1;
+         for align in alignments:
+            if minProb > align[2]: minProb=align[2];
+            oFile.write(" " + str(align[0]) + " " + align[1] + " " + str(align[2]));
+         oFile.write(" 0 + " + str(minProb*qTOpInvert(phread[0])*qTOpInvert(phread[1])*qTOpInvert(phread[2])));
+         # add noise alignment with 3 extra mismatches on first bases
+         oFile.write("\n");
+
+         readId=alignment[0];
+         seq = readSeq;
+         phread=readPhread;
+         del alignments[:]
+         prob=1.0;
+         for Q in phread:
+            prob *= qTOp(Q);
+         if options.paired:
+            fragL = int(align2[3+colS])-int(alignment[3+colS]);
+            prob *= normpdf(fragL, fragMu, fragStD);
+            phread2=r2Phread;
+            for Q in phread2:
+               prob *= qTOp(Q);
+      # set transcript id
+      if alignment[2+colS][prefixL:] in trMap:
+         trans = trMap[ alignment[2+colS][prefixL:] ];
+      else:
+         trans = 0;
+         print "Transcript '"+alignment[2+colS]+"' or '"+alignment[2+colS][prefixL:]+"' was not found in the transcript file.";
+         #print alignment;
+      # calculate probabilities
+      probLoc = prob;
+      if len(alignment)==columnN:
+            mismatch=alignment[columnN-1]
+            mismatchArray = mismatch.split(",");
+            for mis in mismatchArray:
+               try:
+                  pos = int( mis.split(":")[0] );
+               except:
+                  pos=0;
+                  print 'X',mis,'X',alignment;
+               probLoc = probLoc * qTOpInvert(phread[pos]);
+      if options.paired and len(align2)==columnN:
+            mismatch=align2[columnN-1]
+            mismatchArray = mismatch.split(",");
+            for mis in mismatchArray:
+               try:
+                  pos = int( mis.split(":")[0] );
+               except:
+                  pos=0;
+                  print mis
+               probLoc = probLoc * qTOpInvert(phread2[pos]);
+
+      # add new alignment to list
+      alignments.append( (trans, alignment[1+colS], probLoc) );
+   #   if len(alignments)>2 and alignments[len(alignments)-1][2]!=alignments[len(alignments)-2][2]:
+   #      print readId;
+
+   readId = readId.replace(" ","_");
+   oFile.write(readId+" "+str(len(alignments)+1)+" alignments:");
+   minProb = 1;
+   for align in alignments:
+      if minProb > align[2]: minProb=align[2];
+      oFile.write(" " + str(align[0]) + " " + str(align[1]) + " " + str(align[2]));
+   oFile.write(" 0 + " + str(minProb*qTOpInvert(phread[0])*qTOpInvert(phread[1])*qTOpInvert(phread[2])));
+   # add noise alignment with 1 extra mismatch on first base
+   oFile.write("\n");
+
+
+print "Processed:\n  ",alN,"alignments + (",readN,"noise alignments)\n  ",readN,"reads\n  ",trN,"transcripts\nTotal reads: ",Ntotal,"\n";
+aFile.close();
+oFile.close();
diff --git a/releaseDo.sh b/releaseDo.sh
new file mode 100755
index 0000000..f670359
--- /dev/null
+++ b/releaseDo.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Script that automizes creating new BitSeq release.
+# Copies relevant files listed in releaseList, uses _release_Makefile as new Makefile
+# (make sure it's correct) and copies directories boost and samtools.
+
+if [ $# -ne 1 ]
+then
+   echo "reselaseDo.sh [dirName]"
+   exit
+fi
+
+DIR=$1
+
+if [ -d $DIR ]
+then
+   echo "Direcotry $DIR already exists!";
+   exit
+fi
+
+mkdir $DIR
+
+# Cleanup:
+make clean-all
+
+#svn export asa103 $DIR/asa103
+if [[ -d .svn ]]
+then
+   svn export asa103 $DIR/asa103
+   svn export boost $DIR/boost
+   svn export samtools $DIR/samtools
+else
+   echo "Copying boost to '$DIR'."
+   cp -r boost $DIR
+   cp -rv asa103 boost samtools $DIR
+fi
+
+cp -v _release_Makefile $DIR/Makefile
+
+cp -v $( cat releaseList ) $DIR
+
+echo "==================" >> $DIR/README
+date >> $DIR/README
+if [[ -d .svn ]]
+then
+   svn info | grep -e "^Revision:" >> $DIR/README
+else
+   git log -1 | grep "commit" >> $DIR/README
+fi
+
+echo "REMINDERs:"
+echo "File Makefile contains current version of BitSeq, please update if haven't done already."
diff --git a/releaseList b/releaseList
new file mode 100644
index 0000000..328fa0f
--- /dev/null
+++ b/releaseList
@@ -0,0 +1,55 @@
+ArgumentParser.cpp
+ArgumentParser.h
+CollapsedSampler.cpp
+CollapsedSampler.h
+common.cpp
+common.h
+convertSamples.cpp
+estimateDE.cpp
+estimateExpression.cpp
+estimateHyperPar.cpp
+estimateVBExpression.cpp
+extractSamples.cpp
+FileHeader.cpp
+FileHeader.h
+getFoldChange.cpp
+getGeneExpression.cpp
+getPPLR.cpp
+getVariance.cpp
+getWithinGeneExpression.cpp
+GibbsParameters.cpp
+GibbsParameters.h
+GibbsSampler.cpp
+GibbsSampler.h
+lowess.cpp
+lowess.h
+misc.cpp
+misc.h
+MyTimer.cpp
+MyTimer.h
+parseAlignment.cpp
+PosteriorSamples.cpp
+PosteriorSamples.h
+ReadDistribution.cpp
+ReadDistribution.h
+Sampler.cpp
+Sampler.h
+SimpleSparse.cpp
+SimpleSparse.h
+TagAlignments.cpp
+TagAlignments.h
+TranscriptExpression.cpp
+TranscriptExpression.h
+TranscriptInfo.cpp
+TranscriptInfo.h
+TranscriptSequence.cpp
+TranscriptSequence.h
+transposeFiles.cpp
+transposeFiles.h
+transposeLargeFile.cpp
+VariationalBayes.cpp
+VariationalBayes.h
+extractTranscriptInfo.py
+getCounts.py
+parameters1.txt
+README
diff --git a/tagAlignment.h b/tagAlignment.h
new file mode 100644
index 0000000..6fcf143
--- /dev/null
+++ b/tagAlignment.h
@@ -0,0 +1,37 @@
+#ifndef TAGALIGNMENT_H
+#define TAGALIGNMENT_H
+
+
+
+class TagAlignment{
+   protected:
+      long trId;
+//      bool strand; // true = forward; false = reverse
+      long double prob;
+   public:
+      TagAlignment(long t=0,long double p = 0){
+         trId=t;
+//         strand=s;
+         prob=p;
+      }
+      long getTrId()const {return trId;}
+      double getProb()const {return prob;}
+      void setProb(double p){prob=p;}
+}; 
+
+class TagAlignment2: public TagAlignment {
+   private:
+      long double lowProb;
+   public:
+      //TagAlignment(long t=0,bool s=true,long double p = 0,long double lp = 0){
+      TagAlignment2(long t=0,long double p = 0,long double lp = 0){
+         trId=t;
+//         strand=s;
+         prob=p;
+         lowProb = lp;
+      }
+      double getLowProb()const {return lowProb;}
+}; 
+
+
+#endif
diff --git a/transposeFiles.cpp b/transposeFiles.cpp
new file mode 100644
index 0000000..3f15c1a
--- /dev/null
+++ b/transposeFiles.cpp
@@ -0,0 +1,146 @@
+#include<cstdlib>
+#include<fstream>
+#include<iomanip>
+#include<vector>
+
+using namespace std;
+
+#include "FileHeader.h"
+#include "transposeFiles.h"
+
+#include "common.h"
+
+bool transposeFiles(vector<string> inFileNames, string outFileName, bool verbose, string message){
+   long M=0,fileN=1,i,j,bufMax,bufN,m,n,totalN,maxN=0,f;
+   bool trans=false,transposed=false;
+   vector<long> N;
+   bufMax=BUFFER_DEFAULT;
+
+   ofstream outFile(outFileName.c_str());
+   if(!outFile.is_open()){//{{{
+      error("TransposeFile: Unable to open output file\n");
+      return 0;
+   }//}}}
+   //{{{ Opening input
+   fileN = inFileNames.size();
+   ifstream *inFile = new ifstream[fileN];
+   totalN=0;
+   FileHeader fh;
+   for(i=0;i<fileN;i++){
+      inFile[i].open(inFileNames[i].c_str());
+      fh.setFile(&inFile[i]);
+      m = n = 0;
+      if((!fh.samplesHeader(&n,&m,&trans)) || (m == 0) || (n == 0)){
+         error("TransposeFile: Unable to read header of file: %s\n",(inFileNames[i]).c_str());
+         return false;
+      }
+      if(N.size()==0){
+         M=m;
+         transposed=trans;
+         maxN=n;
+      }else if((M!=m)||(transposed!=trans)){
+         error("TransposeFile: Different number of transcripts or file %s is in wrong format.\n",(inFileNames[i]).c_str());
+         return false;
+      }
+      outFile<<"# "<<inFileNames[i]<<" "<<n<<endl;
+      N.push_back(n);
+      if(n>maxN)maxN=n;
+      totalN+=n;
+   }
+   if(bufMax>M)bufMax=M;
+   //}}}
+
+   outFile<<message;
+   if(!trans)
+      outFile<<"# T (M rows,N cols)";
+   else 
+      outFile<<"# (N rows,M cols)";
+   outFile<<"\n# M "<<M<<"\n# N "<<totalN<<endl;
+   outFile.precision(9);
+   outFile<<scientific;
+   if(verbose)message("Transposing files:\n Samples: %ld Transcripts: %ld Buffer size: %ld\n",totalN,M,bufMax);
+   if(!trans){ // {{{
+      vector< vector<long> > seeks(fileN,vector<long>(maxN,-1));
+      vector<vector<string> > valueBuf(bufMax,vector<string>(totalN));
+      long lastBuf = 0, done=0;
+      bufN=bufMax;
+      if(verbose)messageF("(r");
+      for(f=0;f<fileN;f++){
+         for(i=0;i<N[f];i++){
+            for(j=0;j<bufN;j++) inFile[f]>>valueBuf[j][lastBuf];
+            lastBuf++;
+            seeks[f][i]=inFile[f].tellg();
+            inFile[f].ignore(10000000,'\n');
+         }
+      }
+      if(verbose)messageF(">w.");
+      for(j=0;j<bufN;j++){
+         for(i=0;i < lastBuf - 1;i++)
+            outFile<<valueBuf[j][i]<<" ";
+         // Write last value without space.
+         outFile<<valueBuf[j][i]<<endl;
+      }
+      lastBuf=0;
+      done=bufN;
+      while(done<M){
+         bufN=bufMax;
+         if(M-done<bufMax)bufN=M-done;
+         if(verbose)messageF("r");
+         for(f=0;f<fileN;f++){
+            for(i=0;i<N[f];i++){
+               inFile[f].seekg(seeks[f][i]);
+               for(j=0;j<bufN;j++) inFile[f]>>valueBuf[j][lastBuf];
+               lastBuf++;
+               seeks[f][i]=inFile[f].tellg();
+            }
+         }
+         if(verbose)messageF(">w.");
+         for(j=0;j<bufN;j++){
+            for(i=0;i < lastBuf - 1;i++)
+               outFile<<valueBuf[j][i]<<" ";
+            // Write last value without space.
+            outFile<<valueBuf[j][i]<<endl;
+         }
+         lastBuf=0;
+         done+=bufN;
+      }
+      for(f=0;f<fileN;f++)inFile[f].close();
+      if(verbose)message(")\n");
+   } // }}}
+   else{ // if(trans) {{{
+      vector<long> seeks(M,-1);
+      vector<vector<string> > valueBuf(M,vector<string>(bufMax));
+      long done;
+      if(verbose)message("(");
+      for(f=0;f<fileN;f++){
+         seeks.assign(M,-1);
+         done = 0;
+         while(done<N[f]){
+            bufN=bufMax;
+            if(bufN>N[f]-done)bufN=N[f]-done;
+            if(verbose)messageF("r");
+            for(j=0;j<M;j++){
+               if(seeks[j]!=-1)inFile[f].seekg(seeks[j]);
+               for(i=0;i<bufN;i++){
+                  inFile[f]>>valueBuf[j][i];
+               }
+               seeks[j]=inFile[f].tellg();
+               if((j+1<M)&&(seeks[j+1]==-1))inFile[f].ignore(100000000,'\n');
+            }
+            if(verbose)messageF(">w.");
+            for(i=0;i<bufN;i++){
+               for(j=0;j < M - 1;j++)
+                  outFile<<valueBuf[j][i]<<" ";
+               // Write last value without space.
+               outFile<<valueBuf[j][i]<<endl;
+            }
+            done+=bufN;
+         }
+         inFile[f].close();      
+      }
+      if(verbose)message(")\n");
+   } //}}}
+   delete[] inFile;
+   outFile.close();
+   return true;
+}
diff --git a/transposeFiles.h b/transposeFiles.h
new file mode 100644
index 0000000..da9ff70
--- /dev/null
+++ b/transposeFiles.h
@@ -0,0 +1,4 @@
+
+#define BUFFER_DEFAULT 20000
+
+bool transposeFiles(vector<string> inFileNames, string outFileName, bool verbose, string message = "");
diff --git a/transposeLargeFile.cpp b/transposeLargeFile.cpp
new file mode 100644
index 0000000..aba31a9
--- /dev/null
+++ b/transposeLargeFile.cpp
@@ -0,0 +1,22 @@
+#include "ArgumentParser.h"
+#include "transposeFiles.h"
+#include "common.h"
+
+int main(int argc,char* argv[]){
+   string programDescription = 
+"Transposes [input files] into [outFileName] so that there are M lines with N columns each.";
+   ArgumentParser args(programDescription,"[input files]",1);
+   args.addOptionS("o","outFile","outFileName",1,"Name of the output file.");
+   if(!args.parse(argc,argv))return 0;
+   if(args.verbose)buildTime(argv[0],__DATE__,__TIME__);
+
+   if(transposeFiles(args.args(),args.getS("outFileName"),args.verbose)){
+      if(args.verbose)message("DONE.\n");
+      return 0;
+   }else{
+      error("Failed.\n");
+      return 1;
+   }
+}
+
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bitseq.git



More information about the debian-med-commit mailing list