[med-svn] [subread] 01/06: Imported Upstream version 1.4.6-p5+dfsg

Alex Mestiashvili malex-guest at moszumanska.debian.org
Sat Sep 5 11:35:39 UTC 2015


This is an automated email from the git hooks/post-receive script.

malex-guest pushed a commit to branch master
in repository subread.

commit ea3f6718dc558fe9c146c338a9c652a8bc77e975
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date:   Sat Sep 5 12:30:16 2015 +0200

    Imported Upstream version 1.4.6-p5+dfsg
---
 doc/MDSplot.png                                   |  Bin 0 -> 22947 bytes
 doc/SubreadUsersGuide.bib                         |   73 ++
 doc/SubreadUsersGuide.tex                         | 1219 +++++++++++++++++++++
 doc/indel.png                                     |  Bin 0 -> 108132 bytes
 doc/junction.png                                  |  Bin 0 -> 172344 bytes
 doc/seed-and-vote.png                             |  Bin 0 -> 112767 bytes
 doc/voom_mean_variance.png                        |  Bin 0 -> 99213 bytes
 src/HelperFunctions.c                             |   47 +-
 src/HelperFunctions.h                             |    2 +-
 src/core-junction.c                               |   24 +-
 src/coverage_calc.c                               |   18 +-
 src/makefile.version                              |    2 +-
 src/readSummary.c                                 |  669 ++++++-----
 src/subread.h                                     |    1 +
 test/featureCounts/result/test-minimum.FC         |    9 +
 test/featureCounts/result/test-minimum.FC.summary |   12 +
 16 files changed, 1768 insertions(+), 308 deletions(-)

diff --git a/doc/MDSplot.png b/doc/MDSplot.png
new file mode 100644
index 0000000..a07a5c4
Binary files /dev/null and b/doc/MDSplot.png differ
diff --git a/doc/SubreadUsersGuide.bib b/doc/SubreadUsersGuide.bib
new file mode 100644
index 0000000..7379ac2
--- /dev/null
+++ b/doc/SubreadUsersGuide.bib
@@ -0,0 +1,73 @@
+ at article{liao,
+	author={Liao, Y. and Smyth, G. K. and Shi, W.},
+	year={2013},
+	title={The Subread aligner: fast, accurate and scalable read mapping by seed-and-vote},
+	journal={Nucleic Acids Research},
+	volume={41},
+	issue={10},
+	pages={e108}
+}
+ at article{TangNC2013,
+	author={Tang, K. W. and Alaei-Mahabadi, B. and Samuelsson, T. and Lindh, M. and Larsson, E.},
+	year={2013},
+	title={{The landscape of viral expression and host gene fusion and adaptation in human cancer}},
+	journal={Nature Communications.},
+	volume={2013 Oct 1;4:2513. doi: 10.1038/ncomms3513},
+	pages={}
+}
+ at article{ManNI2013,
+	author={Man, K. and Miasari, M. and Shi, W. and Xin, A. and Henstridge, D. C. and Preston, S. and Pellegrini, M. and Belz, G. T. and Smyth, G. K. and Febbraio, M. A. and Nutt, S. L. and Kallies, A.},
+	year={2013},
+	title={{The transcription factor IRF4 is essential for TCR affinity-mediated metabolic programming and clonal expansion of T cells}},
+	journal={Nature Immunology},
+	volume={2013 Sep 22. doi: 10.1038/ni.2710},
+	pages={}
+}
+ at article{SpangenbergSCR2013,
+	author={Spangenberg, L. and Shigunov, P. and Abud, A. P. and Cofré, A. R. and Stimamiglio, M. A. and Kuligovski, C. and Zych, J. and Schittini, A. V. and Costa, A. D. and Rebelatto, C. K. and Brofman, P. R. and Goldenberg, S. and Correa, A. and Naya, H. and Dallagiovanna, B.},
+	year={2013},
+	title={{Polysome profiling shows extensive posttranscriptional regulation during human adipocyte stem cell differentiation into adipocytes}},
+	journal={Stem Cell Research},
+	volume={11},
+	pages={902-12}
+}
+ at article{tang,
+	author={Tang, J. Z. and Carmichael, C. L. and Shi, W. and Metcalf, D. and Ng, A. P. and Hyland, C. D. and Jenkins, N. A. and Copeland, N. G. and Howell, V. M. and Zhao, Z. J. and Smyth, G. K. and Kile, B. T. and Alexander, W. S.},
+	year={2013},
+	title={{Transposon mutagenesis reveals cooperation of ETS family transcription factors with signaling pathways in erythro-megakaryocytic leukemia}},
+	journal={Proc Natl Acad Sci U S A},
+	volume={110},
+	pages={6091-6}
+}
+ at article{ezh2,
+	author={Pal, B. and Bouras, T. and Shi, W and Vaillant, F. and Sheridan, J. M. and Fu, N. and Breslin, K. and Jiang, K. and Ritchie, M. E. and Young, M. and Lindeman, G. J. and Smyth, G. K. and Visvader, J. E.},
+	year={2013},
+	title={{Global changes in the mammary epigenome are induced by hormonal cues and coordinated by Ezh2}},
+	journal={Cell Reports},
+	volume={3},
+	pages={411-26}
+}
+ at article{fcounts,
+	author={Liao, Y. and Smyth, G. K. and Shi, W.},
+	year={2014},
+	title={{featureCounts: an efficient general-purpose program for assigning sequence reads to genomic features.}},
+	journal={Bioinformatics},
+	volume={30},
+	issue={7},
+	pages={923-30}
+}
+ at article{seqc,
+	author={SEQC/MAQC-III Consortium},
+	year={2014},
+	title={{A comprehensive assessment of RNA-seq accuracy, reproducibility and information content by the Sequencing Quality Control Consortium.}},
+	journal={Nature Biotechnology},
+	volume={32},
+	issue={9},
+	pages={903-14}
+}
+ at article{exactsnp,
+	author={Liao, Y. and Smyth, G. K. and Shi, W.},
+	year={},
+	title={{ExactSNP: an efficient and accurate SNP calling algorithm}},
+	journal={In preparation},
+}
diff --git a/doc/SubreadUsersGuide.tex b/doc/SubreadUsersGuide.tex
new file mode 100644
index 0000000..c93db6e
--- /dev/null
+++ b/doc/SubreadUsersGuide.tex
@@ -0,0 +1,1219 @@
+\documentclass[12pt]{report}
+\usepackage{graphicx,fancyvrb,url,comment,longtable,color,ccaption}
+\usepackage{amsmath}
+
+\textwidth=6.6in
+\textheight=8.7in
+\oddsidemargin=-0.1in
+\evensidemargin=-0.1in
+\headheight=0in
+\headsep=0in
+\topmargin=-0.1in
+
+\DefineVerbatimEnvironment{Rcode}{Verbatim}{fontsize=\footnotesize}
+\newcommand{\code}[1]{{\small\texttt{#1}}}
+\newcommand{\Subread}{\textsf{Subread}}
+\newcommand{\Subjunc}{\textsf{Subjunc}}
+\newcommand{\Rsubread}{\textsf{Rsubread}}
+\newcommand{\ExactSNP}{\textsf{ExactSNP}}
+\newcommand{\limma}{\textsf{limma}}
+\newcommand{\edgeR}{\textsf{edgeR}}
+\newcommand{\DGEList}{\textsf{DGEList}}
+\newcommand{\voom}{\textsf{voom}}
+\newcommand{\featureCounts}{\textsf{featureCounts}}
+\newcommand{\R}{\textsf{R}}
+\newcommand{\C}{\textsf{C}}
+\newcommand{\Rpackage}[1]{\textsf{#1}}
+\excludecomment{hide}
+
+\makeindex
+\begin{document}
+
+\begin{titlepage}
+
+\begin{center}
+{\Huge\bf Subread/Rsubread Users Guide}\\
+\vspace{1 cm}
+{\centering\large Subread v1.4.6-p5/Rsubread v1.19.2\\}
+\vspace{1 cm}
+\centering 28 August 2015\\
+\vspace{5 cm}
+\Large Wei Shi and Yang Liao\\
+\vspace{1 cm}
+\small
+{\large Bioinformatics Division\\
+The Walter and Eliza Hall Institute of Medical Research\\
+The University of Melbourne\\
+Melbourne, Australia\\}
+\vspace{7 cm}
+\centering Copyright \small{\copyright}  2011 - 2015\\
+\end{center}
+
+\end{titlepage}
+
+\tableofcontents
+
+\chapter{Introduction}
+
+The Subread/Rsubread packages comprise a suite of high-performance software programs for processing next-generation sequencing data.
+Included in these packages are \code{Subread} aligner, \code{Subjunc} aligner, \code{Subindel} long indel detection program, \code{featureCounts} read quantification program, \code{exactSNP} SNP calling program and other utility programs.
+This document provides a detailed description to the programs included in the packages.
+
+\code{Subread} and \code{Subjunc} aligners adopt a mapping paradigm called ``seed-and-vote'' \cite{liao}.
+This is an elegantly simple multi-seed strategy for mapping reads to a reference genome. 
+This strategy chooses the mapped genomic location for the read directly from the seeds.
+It uses a relatively large number of short seeds (called subreads) extracted from each read and allows all the seeds to vote on the optimal location.
+When the read length is $<$160 bp, overlapping subreads are used.
+More conventional alignment algorithms are then used to fill in detailed mismatch and indel information between the subreads that make up the winning voting block.
+The strategy is fast because the overall genomic location has already been chosen before the detailed alignment is done.
+It is sensitive because no individual subread is required to map exactly, nor are individual subreads constrained to map close by other subreads.
+It is accurate because the final location must be supported by several different subreads. The strategy extends easily to find exon junctions, by locating reads that contain sets of subreads mapping to different exons of the same gene.
+It scales up efficiently for longer reads.
+
+\code{Subread} is a general-purpose read aligner.
+It can be used to align reads generated from both genomic DNA sequencing and RNA sequencing technologies.
+It been successfully used in a number of high-profile studies \cite{TangNC2013,ManNI2013,SpangenbergSCR2013,tang,ezh2}.
+\code{Subjunc} is specifically designed to detect exon-exon junctions and to perform full alignments for RNA-seq reads.
+Note that \code{Subread} performs local alignments for RNA-seq reads, whereas \code{Subjunc} performs global alignments for RNA-seq reads.
+Both \code{Subread} and \code{Subjunc} detect insertions, deletions, fusions and these genomic events are reported in the read mapping results.
+\code{Subjunc} also reports all the splice sites discovered from the exon-spanning reads.
+It re-aligns all the reads using the compiled list of splice sites in its last step of read alignment, and it can detect splice sites in any location of the reads.
+
+The \code{Subindel} program carries out local read assembly to discover long insertions and deletions.
+Read mapping should be performed before running this program.
+
+The {\featureCounts} program is designed to assign mapped reads or fragments (paired-end data) to genomic features such as genes, exons and promoters.
+It is a light-weight read counting program suitable for count both gDNA-seq and RNA-seq reads for genomic features\cite{fcounts}.
+The \textsf{Subread-featureCounts-limma/voom} pipeline has been found to be one of the best-performing pipelines for the analyses of RNA-seq data by the SEquencing Quality Control (SEQC) study, the third stage of the well-known MicroArray Quality Control (MAQC) project \cite{seqc}.
+
+Also included in this software suite is a very efficient SNP caller -- {\ExactSNP}.
+{\ExactSNP} measures local background noise for each candidate SNP and then uses that information to accurately call SNPs.
+
+These software programs support a variety of sequencing platforms including Illumina GA/HiSeq, ABI SOLiD, Life Science 454, Helicos Heliscope and Ion Torrent. They are released in two packages -- SourceForge \emph{Subread} package and Bioconductor \emph{Rsubread} package.
+
+\chapter{Preliminaries}
+
+\section{Citation}
+
+If you use {\Subread} or {\Subjunc} aligners, please cite:\\
+
+\begin{quote}
+Liao Y, Smyth GK and Shi W. The Subread aligner: fast, accurate and scalable read mapping by seed-and-vote. Nucleic Acids Research, 41(10):e108, 2013
+\\
+{\color{blue}{\url{http://www.ncbi.nlm.nih.gov/pubmed/23558742}} }
+\end{quote}
+
+If you use \featureCounts, please cite:\\
+\begin{quote}
+Liao Y, Smyth GK and Shi W. featureCounts: an efficient general-purpose program for assigning sequence reads to genomic features. Bioinformatics, 2013 Nov 30. [Epub ahead of print]
+\\
+{\color{blue}{\url{http://www.ncbi.nlm.nih.gov/pubmed/24227677}}}
+\end{quote}
+
+
+\section{Download and installation}
+\subsection{SourceForge {\Subread} package}
+
+\subsubsection{Installation from a binary distribution}
+
+This is the easiest way to install the {\Subread} package onto your computer.
+Download a {\Subread} binary distribution that suits your oprating system, from the SourceForge website {\color{blue}{\url{http://subread.sourceforge.net}}}. The operating systems currently being supported include multiple variants of Linux (Debian, Ubuntu, Fedora and Cent OS) and Mac OS X. Both 64-bit and 32-bit machines are supported. The executables can be found in the `bin' diretory of the binary package.
+
+To install {\Subread} package for other operating systems such as FreeBSD and Solaris, you will have to install them for the source.
+
+
+\subsubsection{Installation from the source package}
+
+Download {\Subread} source package to your working directory from SourceForge \\
+{\color{blue}{\url{http://subread.sourceforge.net}}}, and type the following command to uncompress it:\\
+
+\code{tar zxvf subread-1.x.x.tar.gz}\\
+
+Enter \code{src} directory of the package and issue the following command to install it on a Linux operating system: \\
+
+\code{make -f Makefile.Linux}\\
+
+To install it on a Mac OS X operating system, issue the following command:\\
+
+\code{make -f Makefile.MacOS}\\
+
+To install it on a FreeBSD operating system, issue the following command:\\
+
+\code{make -f Makefile.FreeBSD}\\
+
+To install it on Oracle Solaris or OpenSolaris computer operating systems, issue the following command:\\
+
+\code{make -f Makefile.SunOS}\\
+
+
+A new directory called \code{bin} will be created under the home directory of the software package, and the executables generated from the compilation are saved to that directory.
+To enable easy access to these executables, you may copy them to a system directory such as \code{/usr/bin} or add the path to them to your search path (your search path is usually specified in the environment variable \code{`PATH'}).
+
+
+\subsection{Bioconductor {\Rsubread} package}
+
+You have to get {\R} installed on my computer to install this package.
+Lauch an {\R} session and issue the following command to install it:
+
+\begin{Rcode}
+source("http://bioconductor.org/biocLite.R")
+biocLite("Rsubread")
+\end{Rcode}
+
+Alternatively, you may download the {\Rsubread} source package directly from {\color{blue}{\url{http://bioconductor.org/packages/release/bioc/html/Rsubread.html}} } and install it to your {\R} from the source.
+
+
+\section{How to get help}
+
+Bioconductor mailing list (\url{http://bioconductor.org/}) and SeqAnswer forum (\url{http://www.seqanswers.com}) are the best places to get help and to report bugs.
+Alternatively, you may contact Wei Shi (shi at wehi dot edu dot au) directly.
+
+\chapter{The seed-and-vote mapping paradigm}
+
+\section{Seed-and-vote}
+
+We have developed a new read mapping paradigm called ``seed-and-vote" for efficient, accurate and scalable read mapping \cite{liao}.
+The seed-and-vote strategy uses a number of overlapping seeds from each read, called \emph{subreads}.
+Instead of trying to pick the best seed, the strategy allows all the seeds to vote on the optimal location for the read.
+The algorithm then uses more conventional alignment algorithms to fill in detailed mismatch and indel information between the subreads that make up the winning voting block. 
+The following figure illustrates the proposed seed-and-vote mapping approach with an toy example.
+
+\begin{center}
+\includegraphics[scale=0.3]{seed-and-vote.png}
+\end{center}
+
+Two aligners have been developed under the seed-and-vote paradigm, including \code{Subread} and \code{Subjunc}.
+\code{Subread} is a general-purpose read aligner, which can be used to map both genomic DNA-seq and RNA-seq read data.
+Its running time is determined by the number of \emph{subreads} extracted from each read, not by the read length.
+Thus it has an excellent maping scalability, ie. its running time has only very modest increase with the increase of read length.
+
+\code{Subread} uses the largest mappable region in the read to determine its mapping location, therefore it automatically determines whether a global alignment or a local alignment should be found for the read.
+For the exon-spanning reads in a RNA-seq dataset, \code{Subread} performs local alignments for them to find the target regions in the reference genome that have the largest overlap with them.
+Note that \code{Subread} does not perform global alignments for the exon-spanning reads and it soft clips those read bases which could not be mapped.
+However, the \code{Subread} mapping result is sufficient for carrying out the gene-level expression analysis using RNA-seq data, because the mapped read bases can be reliably used to assign reads, including both exonic reads and exon-spanning reads, to genes.
+
+To get the full alignments for exon-spanning RNA-seq reads, the \code{Subjunc} aligner can be used.
+\code{Subjunc} is designd to discover exon-exon junctions from using RNA-seq data, but it performs full alignments for all the reads at the same time. 
+The \code{Subjunc} mapping results should be used for detecting genomic variations in RNA-seq data, allele-specific expression analysis and exon-level gene expression analysis.
+The Section~\ref{sec:junction} describes how exon-exon junctions are discovered and how exon-spanning reads are aligned using the seed-and-vote paradigm.
+
+\section{Indel detection}
+\subsection{Detection of short indels}
+
+\begin{center}
+\includegraphics[scale=0.3]{indel.png}
+\end{center}
+
+The seed-and-vote paradigm is very powerful in detecting indels (insertions and deletions).
+The figure below shows how we use the \emph{subreads} to confidently detect short indels.
+When there is an indel existing in a read, mapping locations of subreads extracted after the indel will be shifted to the left (insertion) or to the right (deletion), relative to the mapping locations of subreads at the left side of the indel.
+Therefore, indels in the reads can be readily detected by examining the difference in mapping locations of the extracted subreads.
+Moreover, the number of bases by which the mapping location of subreads are shifted gives the precise length of the indel.
+Since no mismatches are allowed in the mapping of the subreads, the indels can be detected with a very high accuracy.
+
+\subsection{Detection of long indels}
+
+Detection of long indels is conducted by performing local read assembly.
+When the specified indel length (`-I' option in SourceForge \code{C} or `indels' paradigm in \code{Rsubread}) is greater than 16, the \code{Subread} and \code{Subjunc} will automatically start the read assembly procedure to identify indels of up to 200bp long.
+\code{Subindel} outputs the assembled contig sequences that contain the detected long insertions and/or deletions and also the CIGAR info for the indels.
+
+\section{Detection of canonical exon-exon junctions}
+\label{sec:junction}
+
+The seed-and-vote paradigm is also very useful in detecting exon-exon junctions, because the short subreads extracted across the entire read can be used to detect short exons in a sensitive and accurate way.
+The figure below shows the schematic of detecting exon-exon junctions and mapping RNA-seq reads by \code{Subjunc}, which uses this paradigm.
+
+The first scan detects all possible exon-exon junctions using the mapping locations of the subreads extracted from each read.
+Matched donor (`GT') and receptor (`AG') sites are required for calling junctions.
+Exons as short as 16bp can be detected in this step.
+The second scan verifies the putative exon-exon junctions discovered from the first scan by performing re-alignments for the junction reads.
+The output from \code{Subjunc} includes the list of verified junctions and also the mapping results for all the reads.
+Orientation of splicing sites is indicated by `XA' tag in section of optional fields in mapping output.
+
+\begin{center}
+\includegraphics[scale=0.5]{junction.png}
+\end{center}
+
+\section{Fusion detection}
+
+\code{Subjunc} can detect genomic fusion events such as chimera in both RNA sequencing and genomic DNA sequencing data.
+It performs fusion detection in a manner similar to what it does for exon-exon junction detection, but it allows the same read to be splitted across different chromosomes.
+It also allows a read to be splitted across different strands on the same chromosome.
+It does not require donor/receptor sites when calling fusions.
+Non-canonical exon-exon junctions, which have donor/receptor sites other than GT/AG, may also be reported when using \code{subjunc} to detect fusions.
+
+If a read is found to (i) map to two or more chromosomes, or (ii) map to different strands of the same chromosome or (iii) to span a regions wider than $2^{27}$ bases, \code{Subjunc} will use optional fields in the SAM/BAM output file to report the secondary alignments of the read.
+The primary alignment of the read is saved in the main fields of the same record.
+The following tags are used for secondary alignments in the optional fields: 
+CC(chromosome name), CP(mapping position), CG(CIGAR string) and CT(strand).
+
+
+\section{Read re-alignments}
+
+Both \code{Subread} and \code{Subjunc} aligners re-align the reads after identifying indels, fusions and exon-exon junctions (subjunc only) from the data.
+They make use of the flanking window approach to identify indels, fusions and exon junctions.
+This is a highly accurate approach since it requires the identified indels, fusions or exon junctions to be flanked by perfectly matched subreads (16mers) at both sides.
+These discovered indels, fusions and exon junctions are then used to re-align the reads.
+Indels, fusions and exon junctions that are located very close to read ends can also be found during the re-alignment.
+We will remove those indels, fusions and junctions if they were found not to be supported by any read after read re-alignment.
+Numbers of reads supporting these genomic events will be reported.
+
+\section{Recommended alignment setting}
+
+It is recommended to turn on \code{-u} option (reporting uniquely mapped reads only) and also \code{-H} option (breaking ties using Hamming distance), when running \code{Subread} and \code{Subjunc} aligners.
+This should give the most accurate mapping results with little or no cost to the mapping percentage.
+This is the default setting used in \code{align} and \code{subjunc} functions in {\Rsubread} package (\code{unique=TRUE} and \code{tieBreakHamming=TRUE}).
+
+
+\chapter{Mapping reads generated by genomic DNA sequencing technologies}
+\label{chapter:subread-dnaseq}
+
+\section{A quick start for using SourceForge {\Subread} package}
+
+An index must be built for the reference first and then the read mapping can be performed.\\
+
+{\noindent\bf Step 1: Build an index}\\
+
+\noindent Build a base-space index (default). You can provide a list of FASTA files or a single FASTA file including all the reference sequences.\\
+
+\code{subread-buildindex -o my\_index chr1.fa chr2.fa ...}\\
+
+{\noindent\bf Step 2: Align reads}\\
+
+\noindent Map single-end reads from a gzipped file using 5 threads and save mapping results to a BAM file:\\
+\code{subread-align -T 5 -i my\_index --gzFASTQinput --BAMoutput}\\
+\code{-r reads.txt.gz -o subread\_results.bam}\\
+
+\noindent Detect indels of up to 16bp:\\
+\code{subread-align -I 16 -i my\_index -r reads.txt -o subread\_results.sam}\\
+
+\noindent Report up to three best mapping locations:\\
+\code{subread-align -B 3 -i my\_index -r reads.txt -o subread\_results.sam}\\
+
+\noindent Report uniquely mapped reads only:\\
+\code{subread-align -u -i my\_index -r reads.txt -o subread\_results.sam}\\
+
+\noindent Map paired-end reads:\\
+\code{subread-align -d 50 -D 600 -i my\_index -r reads1.txt -R reads2.txt \newline -o subread\_results.sam}\\
+
+
+\section{A quick start for using Bioconductor {\Rsubread} package}
+
+An index must be built for the reference first and then the read mapping can be performed.\\
+
+{\noindent\bf Step 1: Building an index}\\
+
+\noindent To build the index, you must provide a single FASTA file (eg. ``genome.fa'') which includes all the reference sequences.
+
+\begin{Rcode}
+library(Rsubread)
+buildindex(basename="my_index",reference="genome.fa")
+\end{Rcode}
+
+{\noindent\bf Step 2: Aligning the reads}\\
+
+\noindent Map single-end reads using 5 threads:
+\begin{Rcode}
+align(index="my_index",readfile1="reads.txt.gz",output_file="rsubread.bam",nthreads=5)
+\end{Rcode}
+
+\noindent Detect indels of up to 16bp:
+\begin{Rcode}
+align(index="my_index",readfile1="reads.txt.gz",output_file="rsubread.bam",indels=16)
+\end{Rcode}
+
+\noindent Report up to three best mapping locations:
+\begin{Rcode}
+align(index="my_index",readfile1="reads.txt.gz",output_file="rsubread.bam",nBestLocations=3)
+\end{Rcode}
+
+\noindent Report uniquely mapped reads only:
+\begin{Rcode}
+align(index="my_index",readfile1="reads.txt.gz",output_file="rsubread.bam",unique=TRUE)
+\end{Rcode}
+
+\noindent Map paired-end reads:
+\begin{Rcode}
+align(index="my_index",readfile1="reads1.txt.gz",readfile2="reads2.txt.gz",
+output_file="rsubread.bam",minFragLength=50,maxFragLength=600)
+\end{Rcode}
+
+
+\section{Index building}
+\label{sec:index}
+
+The \code{subread-buildindex} (\code{buildindex} function in \Rsubread) program builds an base-space or color-space index using the reference sequences.
+The reference sequences should be in FASTA format (the header line for each chromosomal sequence starts with ``$>$'').\\
+
+This program extracts all the 16 mer sequences from the reference genome at a 2bp interval and then uses them to build a hash table.
+Keys in the hash table are unique 16 mers and values are their chromosomal locations.
+Table 1 describes the arguments used by the \code{subread-buildindex} program.
+
+\begin{table}[h]
+\raggedright{Table 1: Arguments used by the \code{subread-buildindex} program (\code{buildindex} function in \Rsubread).
+Arguments in parenthesis in the first column are used by \code{buildindex}.\newline\\}
+\begin{tabular}{|p{4cm}|p{12cm}|}
+\hline
+Arguments & Description \\
+\hline
+chr1.fa, chr2.fa, ... \newline (\code{reference}) & Give names of chromosome files. Note that in {\Rsubread}, only a single FASTA file including all reference sequences should be provided.\\
+\hline
+-B \newline (\code{indexSplit=FALSE}) & Create one block of index.  The built index will not be split into multiple pieces. This makes the largest amount of memory be requested when running alignments, but it enables the maximum mapping speed to be achieved. This option overrides -M when it is provided as well.\\
+\hline
+-c \newline (\code{colorspace}) & Build a color-space index.\\
+\hline
+-f $<int>$ \newline (\code{TH\_subread}) & Specify the threshold for removing uninformative subreads (highly repetitive 16mers). Subreads will be excluded from the index if they occur more than threshold number of times in the reference genome. Default value is 24.\\
+\hline
+-F \newline (\code{gappedIndex=FALSE}) & Build a full index for the reference genome. 16bp mers (subreads) will be extracted from every position of the reference genome. Under default setting (`-F' is not specified), subreads are extracted in every three bases from the genome.\\
+\hline
+-M $<int>$ \newline (\code{memory}) & Specify the Size of requested memory(RAM) in megabytes, 8000MB by default. With the default value, the index built for a mammalian genome (eg. human or mouse genome) will be saved into one block, enabling the fastest mapping speed to be achieved. The amount of memory used is $\sim$ 7600MB for mouse or human genome (other species have a much smaller memory footprint), when performing read mapping. Using less memory will increase read mapping time.\\
+\hline
+-o $<basename>$ \newline (\code{basename}) & Specify the base name of the index to be created.\\
+\hline
+-v & Output version of the program. \\
+\hline
+\end{tabular}
+\end{table}
+
+\newpage
+
+\section{Read mapping}
+
+The \texttt{subread-align} program (\code{align} in \Rsubread) extracts a number of subreads from each read and then uses these subreads to vote for the mapping location of the read.
+It uses the the ``seed-and-vote'' paradigm for read mapping.
+\code{subread-align} program automatically determines if a read should be globally aligned or locally aligned, making it particularly poweful for mapping RNA-seq reads.
+Table 2 describes the arguments used by the \code{subread-align} program (and also the \code{subjunc} program).
+These arguments are used by the read mapping programs included in both SourceForge \code{Subread} package and Bioconductor \code{Rsubread} package, although argument names are different in these two packages (arguments names used by Bioconductor \code{Rsubread} are included in parenthesis).
+
+\newpage
+
+\begin{longtable}{|p{4cm}|p{12cm}|}
+\multicolumn{2}{p{16cm}}{Table 2: Arguments used by the \code{subread-align}/\code{subjunc} programs included in the SourceForge {\Subread} package.
+Arguments in parenthesis in the first column are the equivalent arguments used in Bioconductor {\Rsubread} package.
+Arguments used by \code{subread-align} only are marked with `$^*$' and arguments used by \code{subjunc} only are marked with `$^{**}$'.
+\newline
+}
+\endfirsthead
+\hline
+Arguments & Description \\
+\hline
+-b \newline (\code{color2base=TRUE}) & Output base-space reads instead of color-space reads in mapping output for color space data (eg. LifTech SOLiD data). Note that the mapping itself will still be performed at color-space.\\
+\hline
+-B $<int>$ \newline (\code{nBestLocations}) & Specify the maximal number of equally-best mapping locations allowed to be reported for each read. 1 by default. Allowed values are between 1 to 16 (inclusive). `NH' tag is used to indicate how many alignments are reported for the read and `HI' tag is used for numbering the alignments reported for the same read, in the output. Note that \code{-u} option takes precedence over \code{-B}.\\
+\hline
+-d $<int>$ \newline (\code{minFragLength}) & Specify the minimum fragment/template length, 50 by default.  Note that if the two reads from the same pair do not satisfy the fragment length criteria, they will be mapped individually as if they were single-end reads.\\
+\hline
+-D $<int>$ \newline (\code{maxFragLength}) & Specify the maximum fragment/template length, 600 by default.\\
+\hline
+$^*$ -E $<int>$ \newline (\code{DP\_GapExtPenalty}) & Specify the penalty for extending the gap when performing the Smith-Waterman dynamic programming. 0 by defaut.\\
+\hline
+$^*$ -G $<int>$ \newline (\code{DP\_GapOpenPenalty}) & Specify the penalty for opening a gap when applying the Smith-Waterman dynamic programming to detecting indels. -2 by defaut.\\
+\hline
+-H \newline (\code{tieBreakHamming =TRUE}) & Use Hamming distance to break ties when more than one best mapping location is found.\\
+\hline
+-i $<index> \newline (\code{index}) $ & Specify the base name of the index.\\
+\hline
+-I $<int>$ \newline (\code{indels}) & Specify the number of INDEL bases allowed in the mapping. 5 by default. Indels of up to 200bp long can be detected.\\
+\hline
+-m  $<int>$ \newline (\code{TH1}) & Specify the consensus threshold, which is the minimal number of consensus subreads required for reporting a hit. The consensus subreads are those subreads which vote for the same location in the reference genome for the read. If pair-end read data are provided, at least one of the two reads from the same pair must satisfy this criteria. 3 by default.\\
+\hline
+-M $<int>$ \newline (\code{maxMismatches}) & Specify the maximum number of mis-matched bases allowed in the alignment. 3 by default. Mis-matches found in soft-clipped bases are not counted.\\
+\hline
+-n $<int>$ \newline (\code{nsubreads}) & Specify the number of subreads extracted from each read, 10 by default.\\
+\hline
+-o $<output>$ \newline (\code{output\_file}) & Give the name of output file. Default output format in SourceForge {\Subread} is SAM, and default output format in Bioconductor {\Rsubread} is BAM. All reads are included in mapping output, including both mapped and unmapped reads, and they are in the same order as in the input file.\\
+\hline
+-p $<int>$ \newline (\code{TH2}) & Specify the minimum number of consensus subreads both reads from the same pair must have. This argument is only applicable for paired-end read data. The value of this argument should not be greater than that of `-m' option, so as to rescue those read pairs in which one read has a high mapping quality but the other does not. 1 by default.\\
+\hline
+-P $<3:6>$ \newline (\code{phredOffset}) & Specify the format of Phred scores used in the input data, '3' for phred+33 and '6' for phred+64. '3' by default. For \code{align} function in \Rsubread, the possible values are `33' (for phred+33) and `64' (for phred+64). `33' by default.\\
+\hline
+-Q \newline (\code{tieBreakQS=TRUE}) & Use mapping quality scores to break ties when more than one best mapping location is found.\\
+\hline
+-r $<input>$ \newline (\code{readfile1}) & Give the name of input file(s) (multiple files are allowed to be provided to \code{align} and \code{subjunc} functions in {\Rsubread}). For paired-end read data, this gives the first read file and the other read file should be provided via the -R option. Supported input formats include FASTQ/FASTA, gzipped FASTQ/FASTA, SAM and BAM. Default input format in SourceForge {\Subread} is FASTQ/FASTA and default input format in Bioconductor {\Rsubread}  [...]
+\hline
+-R $<input>$ \newline (\code{readfile2}) & Provide name of the second read file from paired-end data. The program will switch to paired-end read mapping mode if this file is provided. (multiple files are allowed to be provided to \code{align} and \code{subjunc} functions in {\Rsubread}).\\
+\hline
+-S $<ff:fr:rf>$ \newline (\code{PE\_orientation}) & Specify the orientation of the two reads from the same pair. It has three possible values including `fr', `ff' and `'rf. Letter `f' denotes the forward strand and letter `r' the reverse strand. `fr' by default (ie. the first read in the pair is on the forward strand and the second read on the reverse strand).\\
+\hline
+-T $<int>$ \newline (\code{nthreads}) & Specify the number of threads/CPUs used for mapping. The value should be between 1 and 32. 1 by default.\\
+\hline
+-u \newline (\code{unique=TRUE}) & Output uniquely mapped reads only. Reads that were found to have more than one best mapping location will not be reported.\\
+\hline
+$^*$ -X $<int>$ \newline (\code{DP\_MismatchPenalty}) & Specify the penalty for mismatches when performing the Smith-Waterman dynamic programming.  0 by defaut.\\
+\hline
+$^*$ -Y $<int>$ \newline (\code{DP\_MatchScore}) & Specify the score for the matched base when performing the Smith-Waterman dynamic programming.  2 by defaut.\\
+\hline
+$^{**}$$--$allJunctions \newline (\code{reportAllJunctions =TRUE}) & This option should only be used with \code{subjunc} for RNA-seq data. If specified, \code{subjunc} will report non-canonical exon-exon junctions and structural variants, in addition to the canonical exon-exon junctions (canonical donor/receptor sites detected). Both canonical and non-canonical junctions will be output to a file with a suffix name ".junction.bed". Locations of breakpoints for structural variants will be  [...]
+\hline
+$--$BAMinput \newline (\code{input\_format="BAM"}) & Specify that the input read data are in BAM format.\\
+\hline
+$--$BAMoutput \newline (\code{output\_format="BAM"}) & Specify that mapping results are saved into a BAM format file. \\
+\hline
+$^{**}$$--$dnaseq \newline (\code{DNAseq=TRUE}) & This option should only be used with \code{subjunc} for genomic DNA sequencing data. If specified, \code{subjunc} will search for long deletions, inversions and translocations in the read data. Locations of breakpoints for these structural variants will be saved to a file with a suffix name ".breakpoints.txt". Breakpoint-containing reads may include the following fields for their minor mapped regions in mapping output: CC(Chr), CP(Positio [...]
+\hline
+$--$gzFASTQinput \newline (\code{input\_format=} \newline \code{"gzFASTQ"}) & Specify that the input read data are in gzipped FASTQ or gzipped FASTA format.\\
+\hline
+$--$minDistance BetweenVariants $<int>$ & Minimum allowed distance between two neighboring variants (or junctions in RNA-seq data) within the same read. 16 by default. The value should be greater than 0 and less than the length of the read.\\
+\hline
+$^*$$--$reportFusions \newline (\code{reportFusions=TRUE}) & This option should only be used with \code{subread-align} for the mapping of genomic DNA-seq data. If specified, \code{subread-align} will report discovered fusion events such as chimeras. Fusions are reported in the same format as that used in `$--$dnaseq' option.\\
+\hline
+$--$rg $<string>$ \newline (\code{readGroup}) & Add a $<tag:value>$ to the read group (RG) header in the mapping output. \\
+\hline
+$--$rg-id $<string>$ \newline (\code{readGroupID}) & Specify the read group ID. If specified, the read group ID will be added to the read group header field and also to each read in the mapping output. \\
+\hline
+$--$SAMinput \newline (\code{input\_format="SAM"}) & Specify that the input read data are in SAM format.\\
+\hline
+$--$trim5 $<int>$ \newline (\code{nTrim5}) & Trim off $<int>$ number of bases from 5' end of each read. 0 by default.\\
+\hline
+$--$trim3 $<int>$ \newline (\code{nTrim3}) & Trim off $<int>$ number of bases from 3' end of each read. 0 by default.\\
+\hline
+-v & Output version of the program. \\
+\hline
+\end{longtable}
+
+\newpage
+
+\section{Mapping quality scores}
+
+Both {\Subread} and {\Subjunc} aligners output a mapping quality score (MQS) for each mapped read,
+computed as
+
+\[ MQS = \left\{
+\begin{array}{l l}
+(\sum_{i \in b_m} ( 1 - p_i) - \sum_{i \in b_{mm}} (1 - p_i)) \times 60 / L & \quad \text{if uniquely mapped}\\
+& \quad \text{\scriptsize{[MQS is reset to 0 if less than 0]}}\\
+& \\
+0 & \quad \text{if mapped to $>1$ best location}\\
+\end{array} \right.\]
+
+where $L$ is the read length, $p_i$ is the base-calling $p$-value for the $i$th base in the read, $b_m$ is the set of locations of matched bases, and $b_{mm}$ is the set of locations of mismatched bases.
+
+Base-calling p values can be readily computed from the base quality scores.
+Read bases of high sequencing quality have low base-calling p values.
+Read bases that were found to be insertions are treated as matched bases in the MQS calculation.
+The MQS is a read-length normalized value and it is in the range [0, 60).
+
+\section{Mapping output}
+
+Read mapping results for each library will be saved to a SAM or BAM format file.
+A text file, which includes discovered insertions and deletions, will also be generated for each library (`*.indel').
+If \code{subread-align} was run with the parameter `--reportFusions' or \code{subjunc} was run with the parameter `--dnaseq', then the breakpoints detected from structural variant events will be output to a text file for each library as well.
+
+\newpage
+
+
+
+\chapter{Mapping reads generated by RNA sequencing technologies}
+
+\section{A quick start for using SourceForge {\Subread} package}
+
+\label{sec:rnaseq-subread}
+An index must be built for the reference first and then the read mapping and/or junction detection can be carried out.\\
+
+{\noindent\bf Step 1: Building an index}\\
+
+\noindent The following command can be used to build a base-space index.
+You can provide a list of FASTA files or a single FASTA file including all the reference sequences.\\
+
+\code{subread-buildindex -o my\_index chr1.fa chr2.fa ...}\\
+
+\noindent For more details about index building, see Section~\ref{sec:index}.\\
+
+{\noindent\bf Step 2: Aligning the reads}\\
+
+\noindent{{\Subread}}\\
+
+\noindent For the purpose of differential expression analysis (ie. discovering differentially expressed genes), we recommend you to use the {\Subread} aligner.
+{\Subread} carries out local alignments for RNA-seq reads.
+The commands used by {\Subread} to align RNA-seq reads are the same as those used to align gDNA-seq reads.
+Below is an example of using {\Subread} to map single-end RNA-seq reads.\\
+
+\code{subread-align -i my\_index -r rnaseq-reads.txt -o subread\_results.sam}\\
+
+\noindent Another RNA-seq aligner included in this package is the {\Subjunc} aligner.
+{\Subjunc} not only performs read alignments but also detects exon-exon junctions.
+The main difference between {\Subread} and {\Subjunc} is that {\Subread} does not attempt to detect exon-exon junctions in the RNA-seq reads.
+For the alignments of the exon-spanning reads, {\Subread} just uses the largest mappable regions in the reads to find their mapping locations.
+This makes {\Subread} more computationally efficient.
+The largest mappable regions can then be used to reliably assign the reads to their target genes by using a read summarization program (eg. \featureCounts, see Section~\ref{sec:featureCounts}), and differential expression analysis can be readily performed based on the read counts yielded from read summarization.
+Therefore, {\Subread} is sufficient for read mapping if the purpose of the RNA-seq analysis is to perform a differential expression analysis. 
+Also, {\Subread} could report more mapped reads than {\Subjunc}.
+For example, the exon-spanning reads that are not aligned by {\Subjunc} due to the lack of GT/AG splicing signals (this is the only donor/receptor site accepted by {\Subjunc}) could be aligned by {\Subread}, as long as they have a good match with the target region.\\
+
+\noindent{{\Subjunc}}\\
+
+For other purposes of the RNA-seq data anlayses such as exon-exon junction detection and genomic mutation detection, in which reads need to be fully aligned (especially the exon-spanning reads), {\Subjunc} aligner should be used.
+Below is an example command of using {\Subjunc} to perform global alignments for paired-end RNA-seq reads.
+Note that there are two files included in the output: one containing the discovered exon-exon junctions (BED format) and the other containing the mapping results for reads (SAM or BAM format).\\
+
+\code{subjunc -i my\_index -r rnaseq-reads1.txt -R rnaseq-reads2.txt -o subjunc\_result}
+
+\section{A quick start for using Bioconductor {\Rsubread} package}
+
+An index must be built for the reference first and then the read mapping can be performed.\\
+
+{\noindent\bf Step 1: Building an index}\\
+
+\noindent To build the index, you must provide a single FASTA file (eg. ``genome.fa'') which includes all the reference sequences.
+
+\begin{Rcode}
+library(Rsubread)
+buildindex(basename="my_index",reference="genome.fa")
+\end{Rcode}
+
+{\noindent\bf Step 2: Aligning the reads}\\
+
+Please refer to Section~\ref{sec:rnaseq-subread} for difference between {\Subread} and {\Subjunc} in mapping RNA-seq data.
+Below is an example for mapping a single-end RNA-seq dataset using {\Subread}.
+Useful information about \code{align} function can be found in its help page (type \code{?align} in your {\R} prompt).
+
+\begin{Rcode}
+align(index="my_index",readfile1="rnaseq-reads.txt.gz",output_file="subread_results.bam")
+\end{Rcode}
+
+Below is an example for mapping a single-end RNA-seq dataset using {\Subjunc}.
+Useful information about \code{subjunc} function can be found in its help page (type \code{?subjunc} in your {\R} prompt).
+
+\begin{Rcode}
+subjunc(index="my_index",readfile1="rnaseq-reads.txt.gz",output_file="subjunc_results.bam")
+\end{Rcode}
+
+
+\section{Local read alignment}
+
+The \code{Subread} and \code{Subjunc} can both be used to map RNA-seq reads to the reference genome.
+If the goal of the RNA-seq data is to perform expression analysis, eg. finding genes expressing differentially between different conditions, then \code{Subread} is recommended.
+\code{Subread} performs fast local alignments for reads and reports the mapping locations that have the largest overlap with the reads.
+These reads can then be assigned to genes for expression analysis.
+For this type of analysis, global alignments for the exon-spanning reads are not required because local aligments are sufficient to get reads to be accurately assigned to genes.
+
+However, for other types of RNA-seq data analyses such as exon-exon junction discovery, genomic mutation detection and allele-specific gene expression analysis, global alignments are required.
+The next section describes the {\Subjunc} aligner, which performs global aligments for RNA-seq reads.
+ 
+\section{Global read alignment}
+
+{\Subjunc} aligns each exon-spanning read by firstly using a large number of subreads extracted from the read to identify multiple target regions matching the selected subreads, and then using the splicing signals (donor and receptor sites) to precisely determine the mapping locations of the read bases.
+It also includes a verification step to compare the quality of mapping reads as exon-spanning reads with the quality of mapping reads as exonic reads to finally decide how to best map the reads.
+Reads may be re-aligned if required.
+
+Output of {\Subjunc} aligner includes a list of discovered exon-exon junction locations and also the complete alignment results for the reads.
+Table 2 describes the arguments used by the {\Subjunc} program.\\
+
+\section{Mapping output}
+
+Read mapping results for each library will be saved to a SAM or BAM format file.
+The detected exon-exon junctions will be saved to a text file for each library (`*.junction.bed').
+Detected indels will be saved to a text file for each library as well (`*.indel').\\
+
+If the `--allJunctions' argument was specified when running \code{subjunc}, the breakpoints detected from structural variant events will also be saved to a text file for each library (`*.fusion.txt').
+A read gives rise to such a breakpoint if \\
+(1) one part of its sequence maps to one chromosome and the other part maps to a different chromosome (the two parts are separated by the breakpoint), or\\
+(2) both parts map to the same chromosome but to different strands, or\\
+(3) both parts map to the same strand on the same chromosome but their relative position swapped (ie. the right-side part map to the left of the left-side part).
+
+Reads falling into one of the above conditions will contain additional fields in the mapping output, including CC(Chr), CP(Position),CG(CIGAR) and CT(strand). 
+Note that exon-spanning reads that span an intron of $>$500kb long will also contain such additional fields (splicing points detected from these reads are saved in the `*.junction.bed' file).
+
+If none of the above conditions was satisfied, the detected breakpoints will be put into the `*.junction.bed' file, indicating that the breakpoints arise from exon-exon splicing events.
+
+
+\section{Mapping microRNA sequencing reads (miRNA-seq)}
+
+To use {\Subread} aligner to map miRNA-seq reads, a full index must be built for the reference genome before read mapping can be carried out.
+For example, the following command builds a full index for mouse reference genome \emph{mm10}:
+
+\code{\\
+subread-buildindex -F -B -f 72 -o mm10\_full\_index mm10.fa \\
+}
+
+The full index includes 16bp mers extracted from every genomic location in the genome.
+Note that if \code{-F} is not specified, \code{subread-buildindex} builds a gapped index which includes 16bp mers extracted every three bases in the reference genome, ie. there is a 2bp gap between each pair of neighbouring 16bp mers.
+
+After the full index was built, read alignment can be performed.
+Reads do not need to be trimmed before feeding them to {\Subread} aligner since {\Subread} soft clips sequences in the reads that can not be properly mapped.
+The parameters used for mapping miRNA-seq reads need to be carefully designed due to the very short length of miRNA sequences ($\sim$22bp).
+The total number of subreads (16bp mers) extracted from each read should be the read length minus 15, which
+is the maximum number of subreads that can be possibly extracted from a read.
+The reason why we need to extract the maximum number of subreads is to achieve a high sensitivity in detecting the short miRNA sequences.
+
+The threshold for the number of consensus subreads required for reporting a hit should be within the range of 2 to 7 consensus subreads inclusive.
+The larger the number of consensus subreads required, the more stringent the mapping will be.
+Using a threshold of 2 consensus subreads allows the detection of miRNA sequences of as short as 17bp, but the mapping error rate could be relatively high.
+With this threshold, there will be at least 17 perfectly matched bases present in each reported alignment.
+If a threshold of 4 consensus subreads was used, length of miRNA sequences that can be detected is 19 bp or longer.
+With this threshold, there will be at least 19 perfectly matched bases present in each reported alignment.
+When a threshold of 7 consensus subreads was used, only miRNA sequences of 22bp or longer can be detected (at least 22 perfectly matched bases will be present in each reported alignment).
+
+We found that there was a significant decrease in the number of mapped reads when the requried number of consensus subreads increased from 4 to 5 when we tried to align a mouse miRNA-seq dataset, suggesting that there are a lot of miRNA sequences that are only 19bp long.
+We therefore used a threshold of 4 consensus subreads to map this dataset.
+However, what we observed might not be the case for other datasets that were generated from different cell types and different species.
+
+Below is an example of mapping 50bp long reads (adaptor sequences were included in the reads in addition to the miRNA sequences), with at least 4 consensus subreads required in the mapping:
+
+\code{\\
+subread-align -i mm10\_full\_index -n 35 -m 4 -M 3 -T 10 -I 0 -P 3 -B 10 \\
+--BAMoutput -r miRNA\_reads.fastq -o result.sam\\
+}
+
+The `-B 10' parameter instructs {\Subread} aligner to report up to 10 best mapping locations (equally best) in the mapping results.
+The multiple locations reported for the reads could be useful for investigating their true origin, but they might need to be filtered out when assigning mapped reads to known miRNA genes to ensure a high-quality quantification of miRNA genes.
+The miRBase database (\url{http://www.mirbase.org/}) is a useful resource that includes annotations for miRNA genes in many species.
+The {\featureCounts} program can be readily used for summarizing reads to miRNA genes.
+
+\chapter{Read summarization}
+
+\section{Introduction}
+
+Sequencing reads often need to be assigned to genomic features of interest after they are mapped to the reference genome.
+This process is often called \emph{read summarization} or \emph{read quantification}.
+Read summarization is required by a number of downstream analyses such as gene expression analysis and histone modification analysis.
+The output of read summarization is a count table, in which the number of reads assigned to each feature in each library is recorded.
+
+A particular challenge to the read summarization is how to deal with those reads that overlap more than one feature (eg. an exon) or meta-feature (eg. a gene).
+Care must be taken to ensure that such reads are not over-counted or under-counted.
+Here we describe the {\featureCounts} program, an efficient and accurate read quantifier.
+{\featureCounts} has the following features:
+\begin{itemize}
+\item It carries out precise and accurate read assignments by taking care of indels, junctions and fusions in the reads.
+\item It takes less than 4 minutes to summarize 20 million pairs of reads to 26k RefSeq genes using one thread, and uses $<$20MB of memory (you can run it on a Mac laptop).
+\item It supports multi-threaded running, making it extremely fast for summarizing large datasets.
+\item It supports GTF/SAF format annotation and SAM/BAM read data.
+\item It supports strand-specific read summarization.
+\item It can perform read summarization at both feature level (eg. exon level) and meta-feature level (eg. gene level).
+\item It allows users to specify whether reads overlapping with more than one feature should be counted or not.
+\item It gives users full control on the summarization of paired-end reads, including allowing them to check if both ends are mapped and/or if the fragment length falls within the specified range.
+\item It can discriminate the features that were overlapped by both ends of the fragment from the features that were overlapped by only one end of the same fragment to get more accurate read assignments.
+\item It allows users to specify whether chimeric fragments should be counted.
+\item It automatically detects the read input format (SAM or BAM).
+\item It automatically re-order paired-end reads if reads belonging to the same pair are not adjacent to each other in input read files.
+\end{itemize}
+
+\section{featureCounts}
+\label{sec:featureCounts}
+
+\subsection{Input data}
+
+The data input to {\featureCounts} consists of (i) one or more files of aligned reads in either SAM or BAM format and (ii) a list of genomic features in either Gene Transfer Format (GTF) or General Feature Format (GFF) or Simplified Annotation Format (SAF). The read input format (SAM or BAM) is automatically detected and so does not need to be specified by the user. For paired reads, {\featureCounts} also automatically sorts reads by name if paired reads are not in consecutive positions  [...]
+
+The genomic features can be specified in either GTF/GFF or SAF format. The SAF format is the simpler and includes only five required columns for each feature (see next section). In either format, the feature identifiers are assumed to be unique, in accordance with commonly used Gene Transfer Format (GTF) refinement of GFF.
+
+{\featureCounts} supports strand-specific read counting if strand-specific information is provided. Read mapping results usually include mapping quality scores for mapped reads. Users can optionally specify a minimum mapping quality score that the assigned reads must satisfy.
+
+\subsection{Annotation format}
+\label{sec:annotation}
+
+The genomic features can be specified in either GTF/GFF or SAF format.
+A definition of the GTF format can be found at UCSC website (\url{http://genome.ucsc.edu/FAQ/FAQformat.html#format4}).
+The SAF format includes five required columns for each feature: feature identifier, chromosome name, start position, end position and strand.
+These five columns provide the minimal sufficient information for read quantification purposes.
+Extra annotation data are allowed to be added from the sixth column. 
+
+A SAF-format annotation file should be a tab-delimited text file.
+It should also include a header line.
+An example of a SAF annotation is shown as below:
+
+\code{\\
+GeneID	Chr	Start	End	Strand\\
+497097	chr1	3204563	3207049	-\\
+497097	chr1	3411783	3411982	-\\
+497097	chr1	3660633	3661579	-\\
+100503874	chr1	3637390	3640590	-\\
+100503874	chr1	3648928	3648985	-\\
+100038431	chr1	3670236	3671869	-\\
+...
+}
+
+\code{GeneID} column includes gene identifiers that can be numbers or character strings.
+Chromosomal names included in the \code{Chr} column must match the chromosomal names of reference sequences to which the reads were aligned.
+
+\subsection{Single and paired-end reads}
+
+Reads may be paired or unpaired.
+If paired reads are used, then each pair of reads defines a DNA or RNA fragment bookended by the two reads.
+In this case, {\featureCounts} can be instructed to count fragments rather than reads.
+{\featureCounts} automatically sorts reads by name if paired reads are not in consecutive positions in the SAM or BAM file.
+Users do not need sort their paired reads before providing them to {\featureCounts}.
+
+\subsection{Features and meta-features}
+
+{\featureCounts} is a general-purpose read summarization function, which assigns mapped reads (RNA-seq reads or genomic DNA-seq reads) to genomic features or meta-features.
+Each feature is an interval (range of positions) on one of the reference sequences. We define a meta-feature to be a set of features representing a biological construct of interest. For example, features often correspond to exons and meta-features to genes. Features sharing the same feature identifier in the GTF or SAF annotation are taken to belong to the same meta-feature. {\featureCounts} can summarize reads at either the feature or meta-feature levels.
+
+We recommend to use unique gene identifiers, such as NCBI Entrez gene identifiers, to cluster features into meta-features. Gene names are not recommended to use for this purpose because different genes may have the same names. Unique gene identifiers were often included in many publicly available GTF annotations which can be readily used for summarization. The Bioconductor {\Rsubread} package also includes NCBI RefSeq annotations for human and mice. Entrez gene identifiers are used in th [...]
+
+\subsection{Overlap of reads with features}
+
+{\featureCounts} preforms precise read assignment by comparing mapping location of every base in the read or fragment with the genomic region spanned by each feature. It takes account of any gaps (insertions, deletions, exon-exon junctions or fusions) that are found in the read. It calls a hit if any overlap (1bp or more) is found between the read or fragment and a feature.
+A hit is called for a meta-feature if the read or fragment overlaps any component feature of the meta-feature.
+
+\subsection{Multiple overlaps}
+
+A multi-overlap read or fragment is one that overlaps more than one feature, or more than one meta-feature when summarizing at the meta-feature level. {\featureCounts} provides users with the option to either exclude multi-overlap reads or to count them for each feature that is overlapped. The decision whether or not to counting these reads is often determined by the experiment type. We recommend that reads or fragments overlapping more than one gene are not counted for RNA-seq experimen [...]
+
+Note that, when counting at the meta-feature level, reads that overlap multiple features of the same meta-feature are always counted exactly once for that meta-feature, provided there is no overlap with any other meta-feature. For example, an exon-spanning read will be counted only once for the corresponding gene even if it overlaps with more than one exon.
+
+\subsection{In-built annotations}
+
+In-built gene annotations for genomes \emph{hg19}, \emph{mm10} and \emph{mm9} are included in both Bioconductor {\Rsubread} package and SourceForge {\Subread} package.
+These annotations were downloaded from NCBI RefSeq database and then adapted by merging overlapping exons from the same gene to form a set of disjoint exons for each gene.
+Genes with the same Entrez gene identifiers were also merged into one gene.
+
+Each row in the annotation represents an exon of a gene. There are five columns in the annotation data including Entrez gene identifier (\emph{GeneID}), chromosomal name (\emph{Chr}), chromosomal start position(\emph{Start}), chromosomal end position (\emph{End}) and strand (\emph{Strand}).
+
+In {\Rsubread}, users can access these annotations via the {\textsf{getInBuiltAnnotation}} function.
+In {\Subread}, these annotations are stored in directory `annotation' under home directory of the package.
+
+\subsection{Program output}
+
+Output of {\featureCounts} program in SourceForge {\Subread} package is saved into a tab-delimited file, which includes annotation columns (`Geneid', `Chr', `Start', `End', `Strand' and `Length') and data columns (read counts for each gene in each library).
+Annotation column `Length' contains total number of non-overlapping bases of each feature or meta-feature.
+When for example summarizing RNA-seq reads to genes, this column will give total number of non-overlapping bases included in all exons belonging to the same gene, for each gene.
+
+When performing summarization at meta-feature level, annotation columns including `Chr', `Start', `End', `Strand' and `Length' give the annotation information for every feature included each meta-features.
+Therefore, each of these columns may include more than one value (semi-colon separated).
+
+Output of {\featureCounts} program in SourceForge {\Subread} package also includes stat info of summarization results, which is saved to a tab-delimited file as well (a separate file).
+This file gives the total number of reads that are successfully assigned and also numbers of reads that are not assigned due to various reasons.
+Below lists the reasons why reads may not be assigned:
+\begin{itemize}
+\item Unassigned\_Ambiguity: overlapping with two or more features (feature-level summarization) or meta-features (meta-feature-level) summarization.
+\item Unassigned\_MultiMapping: reads marked as multi-mapping in SAM/BAM input (the `NH' tag is checked by the program).
+\item Unassigned\_NoFeatures: not overlapping with any features included in the annotation. 	
+\item Unassigned\_Unmapped: reads are reported as unmapped in SAM/BAM input. Note that if the `--primary' option of featureCounts program is specified, the read marked as a primary alignment will be considered for assigning to features.
+\item Unassigned\_MappingQuality: mapping quality scores lower than the specified threshold.
+\item Unassigned\_FragementLength: length of fragment does not satisfy the criteria.
+\item Unassigned\_Chimera: two reads from the same pair are mapped to different chromosomes or have incorrect orientation.
+\item Unassigned\_Secondary: reads marked as second alignment in the FLAG field in SAM/BAM input. 	
+\item Unassigned\_Nonjunction: reads do not span two or more exons. Such reads will not be assigned if the `--countSplitAlignmentsOnly' option is specified.
+\item Unassigned\_Duplicate: reads marked as duplicate in the FLAG field in SAM/BAM input.
+\end{itemize}
+
+All these output were also provided by the {\featureCounts} function included in Bioconductor {\Rsubread} package, except that read summarization results are saved into an {\R} `List' object.
+For more details, see the help page for {\featureCounts} function in {\Rsubread}.
+
+
+\subsection{Program usage}
+
+Table 3 describes the parameters used by the {\featureCounts} program.
+
+\pagebreak
+
+\begin{longtable}{|p{5cm}|p{11cm}|}
+\multicolumn{2}{p{16cm}}{Table 3: arguments used by the {\featureCounts} program included in the SourceForge {\Subread} package.
+Arguments included in parenthesis are the equivalent parameters used by {\featureCounts} function in Bioconductor {\Rsubread} package.}
+\endfirsthead
+\hline
+Arguments & Description \\
+\hline
+input\_files \newline (\code{files}) & Give the names of input read files that include the read mapping results. The program automatically detects the file format (SAM or BAM). Multiple files can be provided at the same time.\\
+\hline
+-a $<input> \newline (\code{annot.ext, annot.inbuilt})  $ & Give the name of an annotation file. \\
+\hline
+-A \newline (\code{chrAliases}) & Give the name of a file that contains aliases of chromosome names. The file should be a comma delimited text file that includes two columns. The first column gives the chromosome names used in the annotation and the second column gives the chromosome names used by reads. This file should not contain header lines. Names included in this file are case sensitive.\\
+\hline
+-B \newline (\code{requireBothEndsMapped}) & If specified, only fragments that have both ends successfully aligned will be considered for summarization. This option should be used together with \code{-p} (or \code{isPairedEnd} in {\Rsubread} {\featureCounts}).\\
+\hline
+-C \newline (\code{countChimericFragments}) & If specified, the chimeric fragments (those fragments that have their two ends aligned to different chromosomes) will NOT be counted. This option should be used together with \code{-p} (or \code{isPairedEnd} in {\Rsubread} {\featureCounts}).\\
+\hline
+-d $<int>$ \newline (\code{minFragLength}) & Minimum fragment/template length, 50 by default. This option must be used together with \code{-p} and \code{-P}.\\
+\hline
+-D $<int>$ \newline (\code{maxFragLength}) & Maximum fragment/template length, 600 by default. This option must be used together with \code{-p} and \code{-P}.\\
+\hline
+-f \newline (\code{useMetaFeatures}) & If specified, read summarization will be performed at feature level (eg. exon level). Otherwise, it is performed at meta-feature level (eg. gene level).\\
+\hline
+-F \newline (\code{isGTFAnnotationFile}) & Specify the format of the annotation file. Acceptable formats include `GTF' and `SAF' (see Section~\ref{sec:annotation} for details). The {\C} version of {\featureCounts} program uses a GTF format annotation by default, but the R version uses a SAF format annotation by default. The R version also includes in-built annotations.\\
+\hline
+-g $<input>$ \newline (\code{GTF.attrType}) & Specify the attribute type used to group features (eg. exons) into meta-features (eg. genes) when GTF annotation is provided. `gene\_id' by default. This attribute type is usually the gene identifier. This argument is useful for the meta-feature level summarization.\\
+\hline
+-M \newline (\code{countMultiMappingReads}) & If specified, multi-mapping reads/fragments will be counted. A multi-mapping read will be counted up to N times if it has N reported mapping locations. The program uses the `NH' tag to find multi-mapping reads.\\
+\hline
+-o $<input>$ & Give the name of the output file. The output file contains the number of reads assigned to each meta-feature (or each feature if \code{-f} is specified). Note that the {\featureCounts} function in {\Rsubread} does not use this parameter. It returns a \code{list} object including read summarization results and other data. \\
+\hline
+-O \newline (\code{allowMultiOverlap}) & If specified, reads (or fragments if \code{-p} is specified) will be allowed to be assigned to more than one matched meta-feature (or feature if \code{-f} is specified). Reads/fragments overlapping with more than one meta-feature/feature will be counted more than once. Note that when performing meta-feature level summarization, a read (or fragment) will still be counted once if it overlaps with multiple features belonging to the same meta-feature  [...]
+\hline
+-p \newline (\code{isPairedEnd}) & If specified, fragments (or templates) will be counted instead of reads. This option is only applicable for paired-end reads.\\
+\hline
+-P \newline (\code{checkFragLength}) & If specified, the fragment length will be checked when assigning fragments to meta-features or features. This option must be used together with \code{-p}. The fragment length thresholds should be specified using \code{-d} and \code{-D} options.\\
+\hline
+-Q $<int>$ \newline (\code{minMQS}) & The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 0 by default.\\
+\hline
+-R & Output read assignment results for each read (or fragment if paired end). They are saved to a tab-delimited file that contains four columns including read name, status(assigned or the reason if not assigned), name of target feature/meta-feature and number of hits if the read/fragment is counted multiple times. Name of the file is the input file name added with a suffix `.featureCounts'.\\
+\hline
+-s $<int>$ \newline (\code{isStrandSpecific}) & Indicate if strand-specific read counting should be performed. It has three possible values:  0 (unstranded), 1 (stranded) and 2 (reversely stranded). 0 by default. For paired-end reads, strand of the first read is taken as the strand of the whole fragment and FLAG field of the current read is used to tell if it is the first read in the fragment.\\
+\hline
+-S $<ff:fr:rf>$ & Specify the orientation of the two reads from the same pair. It has three possible values including `fr', `ff' and `'rf. Letter `f' denotes the forward strand and letter `r' the reverse strand. `fr' by default (ie. the first read in the pair is on the forward strand and the second read on the reverse strand).\\
+\hline
+-t $<input>$ \newline (\code{GTF.featureType}) & Specify the feature type. Only rows which have the matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default.\\
+\hline
+-T $<int>$ \newline (\code{nthreads}) & Number of the threads. The value should be between 1 and 32. 1 by default.\\
+\hline
+-v & Output version of the program. \\
+\hline
+$--$countSplitAlignmentsOnly \newline (\code{countSplitAlignmentsOnly}) & If specified, only split alignments (CIGAR strings containing letter `N') will be counted. All the other alignments will be ignored. An example of split alignments is the exon-spanning reads in RNA-seq data. If exon-spanning reads need to be assigned to all their overlapping exons, `-f' and `-O' options should be provided as well.\\
+\hline
+$--$donotsort \newline (\code{autosort}) & If specified, paired end reads will not be re-ordered even if reads from the same pair were found not to be next to each other in the input.\\
+\hline
+$--$fraction \newline (\code{fraction}) & If specified, a fractional count 1/n will be generated for each multi-mapping read, where n is the number of alignments (indicated by `NH' tag) reported for the read. This option must be used together with the `-M' option.\\
+\hline
+$--$ignoreDup \newline (\code{ignoreDup}) & If specified, reads that were marked as duplicates will be ignored. Bit Ox400 in FLAG field of SAM/BAM file is used for identifying duplicate reads. In paired end data, the entire read pair will be ignored if at least one end is found to be a duplicate read.\\
+\hline
+$--$largestOverlap \newline (\code{largestOverlap}) & If specified, reads (or fragments) will be assigned to the target that has the largest number of overlapping bases.\\
+\hline
+$--$minOverlap $<int>$ \newline (\code{minOverlap}) & Specify the minimum required number of overlapping bases between a read (or a fragment) and an overlapping feature. 1 by default. If a negative value is provided, the read will be extended from both ends.\\
+\hline
+$--$primary \newline (\code{countPrimaryAlignmentsOnly}) & If specified, only primary alignments will be counted. Primary and secondary alignments are identified using bit 0x100 in the Flag field of SAM/BAM files. All primary alignments in a dataset will be counted no matter they are from multi-mapping reads or not (ie. `-M' is ignored).\\
+\hline
+$--$read2pos $<int>$ \newline (\code{read2pos}) & The read is reduced to its 5' most base or 3' most base. Read summarization is then performed based on the single base position to which the read is reduced. By default, no read reduction will be performed.\\
+\hline
+$--$readExtension5 $<int>$ \newline (\code{readExtension5}) & Reads are extended upstream by $<int>$ bases from their 5' end. 0 by default.\\
+\hline
+$--$readExtension3 $<int>$ \newline (\code{readExtension3}) & Reads are extended downstream by $<int>$ bases from their 3' end. 0 by default.\\
+\hline
+\end{longtable}
+
+\pagebreak
+
+\section{A quick start for {\featureCounts} in SourceForge \Subread}
+
+You need to provide read mapping results (in either SAM or BAM format) and an annotation file for the read summarization.
+The example commands below assume your annotation file is in GTF format.\\
+
+\noindent Summarize SAM format single-end reads using 5 threads:\\
+
+\noindent\code{featureCounts -T 5 -a annotation.gtf -t exon -g gene\_id \\
+ -o counts.txt mapping\_results\_SE.sam}\\
+
+\noindent Summarize BAM format single-end read data:\\
+
+\noindent\code{featureCounts -a annotation.gtf -t exon -g gene\_id \\
+ -o counts.txt mapping\_results\_SE.bam}\\
+
+\noindent Summarize multiple libraries at the same time:\\
+
+\noindent\code{featureCounts -a annotation.gtf -t exon -g gene\_id \\
+ -o counts.txt mapping\_results1.bam mapping\_results2.bam}\\
+
+\noindent Summarize paired-end reads and count fragments (instead of reads):\\
+
+\noindent\code{featureCounts -p -a annotation.gtf -t exon -g gene\_id \\
+-o counts.txt mapping\_results\_PE.bam}\\
+
+\noindent Count fragments satisfying the fragment length criteria, eg. [50bp, 600bp]:\\
+
+\noindent\code{featureCounts -p -P -d 50 -D 600 -a annotation.gtf -t exon -g gene\_id\\
+-o counts.txt mapping\_results\_PE.bam}\\
+
+\noindent Count fragments which have both ends successfully aligned without considering the fragment length constraint:\\
+
+\noindent\code{featureCounts -p -B -a annotation.gtf -t exon -g gene\_id\\
+ -o counts.txt mapping\_results\_PE.bam}\\
+
+\noindent Exclude chimeric fragments from the fragment counting:\\
+
+\noindent\code{featureCounts -p -C -a annotation.gtf -t exon -g gene\_id\\
+ -o counts.txt mapping\_results\_PE.bam}
+
+
+\section{A quick start for {\featureCounts} in Bioconductor \Rsubread}
+
+You need to provide read mapping results (in either SAM or BAM format) and an annotation file for the read summarization.
+The example commands below assume your annotation file is in GTF format.\\
+
+\noindent Load {\Rsubread} library from you {\R} session:
+
+\begin{Rcode}
+library(Rsubread)
+\end{Rcode}
+
+\noindent Summarize single-end reads using built-in RefSeq annotation for mouse genome mm9:
+\begin{Rcode}
+featureCounts(files="mapping_results_SE.sam",annot.inbuilt="mm9")
+\end{Rcode}
+
+\noindent Summarize single-end reads using a user-provided GTF annotation file:
+
+\begin{Rcode}
+featureCounts(files="mapping_results_SE.sam",annot.ext="annotation.gtf",
+isGTFAnnotationFile=TRUE,GTF.featureType="exon",GTF.attrType="gene_id")
+\end{Rcode}
+
+\noindent Summarize single-end reads using 5 threads:
+
+\begin{Rcode}
+featureCounts(files="mapping_results_SE.sam",nthreads=5)
+\end{Rcode}
+
+\noindent Summarize BAM format single-end read data:
+
+\begin{Rcode}
+featureCounts(files="mapping_results_SE.bam")
+\end{Rcode}
+
+\noindent Summarize multiple libraries at the same time:
+
+\begin{Rcode}
+featureCounts(files=c("mapping_results1.bam","mapping_results2.bam"))
+\end{Rcode}
+
+\noindent Summarize paired-end reads and counting fragments (instead of reads):
+
+\begin{Rcode}
+featureCounts(files="mapping_results_PE.bam",isPairedEnd=TRUE)
+\end{Rcode}
+
+\noindent Count fragments satisfying the fragment length criteria, eg. [50bp, 600bp]:
+
+\begin{Rcode}
+featureCounts(files="mapping_results_PE.bam",isPairedEnd=TRUE,checkFragLength=TRUE,
+minFragLength=50,maxFragLength=600)
+\end{Rcode}
+
+\noindent Count fragments which have both ends successfully aligned without considering the fragment length constraint:
+
+\begin{Rcode}
+featureCounts(files="mapping_results_PE.bam",isPairedEnd=TRUE,requireBothEndsMapped=TRUE)
+\end{Rcode}
+
+\noindent Exclude chimeric fragments from fragment counting:
+
+\begin{Rcode}
+featureCounts(files="mapping_results_PE.bam",isPairedEnd=TRUE,countChimericFragments=FALSE)
+\end{Rcode}
+
+\chapter{SNP calling}
+
+\section{Algorithm}
+
+SNPs(Single Nucleotide Polymorphisms) are the mutations of single nucleotides in the genome.
+It has been reported that many diseases were initiated and/or driven by such mutations.
+Therefore, successful detection of SNPs is very useful in designing better diagnosis and treatments for a variety of diseases such as cancer.
+SNP detection also is an important subject of many population studies.
+
+Next-gen sequencing technologies provide an unprecedented opportunity to identify SNPs at the highest resolution.
+However, it is extremely computing-intensive to analyze the data generated from these technologies for the purpose of SNP discovery  because of the sheer volume of the data and the large number of chromosomal locations to be considered.
+To discover SNPs, reads need to be mapped to the reference genome first and then all the read data mapped to a particular site will be used for SNP calling for that site.
+Discovery of SNPs is often confounded by many sources of errors.
+Mapping errors and sequencing errors are often the major sources of errors causing incorrect SNP calling.
+Incorrect alignments of indels, exon-exon junctions and fusions in the reads can also result in wrong placement of blocks of continuous read bases, likely giving rise to consecutive incorrectly reported SNPs.
+
+We have developed a highly accurate and efficient SNP caller, called \emph{exactSNP} \cite{exactSNP}.
+\emph{exactSNP} calls SNPs for individual samples, without requiring control samples to be provided.
+It tests the statistical significance of SNPs by comparing SNP signals to their background noises.
+It has been found to be an order of magnitude faster than existing SNP callers.
+
+\section{exactSNP}
+
+Below is the command for running \code{exactSNP} program.
+The complete list of parameters used by \code{exactSNP} can be found in Table 4.\\
+
+\noindent\code{exactSNP [options] -i input -g reference\_genome -o output}\\
+
+
+\begin{longtable}{|p{4.5cm}|p{11cm}|}
+\multicolumn{2}{p{16cm}}{Table 4: arguments used by the \code{exactSNP} program included in the SourceForge {\Subread} package. 
+Arguments included in parenthesis are the equivalent parameters used by \code{exactSNP} function in Bioconductor {\Rsubread} package.}
+\endfirsthead
+\hline
+Arguments & Description \\
+\hline
+-a $<file>$ \newline (SNPAnnotationFile) & Specify name of a VCF-format file that includes annotated SNPs. Such annotation files can be downloaded from public databases such as the dbSNP database. Incorporating known SNPs into SNP calling has been found to be helpful. However note that the annotated SNPs may or may not be called for the sample being analyzed. \\
+\hline
+-b \newline (isBAM) & Indicate the input file provided via $-i$ is in BAM format. \\
+\hline
+-f $<float>$ \newline (minAllelicFraction) & Specify the minimum fraction of mis-matched bases a SNP-containing location must have. Its value must between 0 and 1. 0 by default. \\
+\hline
+-g $<file>$ \newline (refGenomeFile) & Specify name of the file including all reference sequences. Only one single FASTA format file should be provided. \\
+\hline
+-i $<file> [-b\ if\ BAM] \newline (readFile)$ & Specify name of an input file including read mapping results. The format of input file can be SAM or BAM  (\code{-b} needs to be specified if a BAM file is provided).\\
+\hline
+-n $<int>$ \newline (minAllelicBases) & Specify the minimum number of mis-matched bases a SNP-containing location must have. 1 by default.\\
+\hline
+-o $<file>$ \newline (outputFile) & Specify name of the output file. This program outputs a VCF format file that includes discovered SNPs. \\
+\hline
+-Q $<int>$  \newline (qvalueCutoff) &  Specify the q-value cutoff for SNP calling at sequencing depth of 50X. 12 by default. The corresponding p-value cutoff is $10^{-Q}$. Note that this program automatically adjusts the q-value cutoff according to the sequencing depth at each chromosomal location.\\
+\hline
+-r $<int>$ \newline (minReads) & Specify the minimum number of mapped reads a SNP-containing location must have (ie. the minimum coverage). 1 by default. \\
+\hline
+-s $<int>$ \newline (minBaseQuality) & Specify the cutoff for base calling quality scores (Phred scores) read bases must satisfy to be used for SNP calling. 13 by default. Read bases that have Phred scores lower than the cutoff value will be excluded from the analysis. \\
+\hline
+-t $<int>$ \newline (nTrimmedBases) & Specify the number of bases trimmed off from each end of the read. 3 by default. \\
+\hline
+-T $<int>$ \newline (nthreads) & Specify the number of threads. 1 by default. \\
+\hline
+-v & Output version of the program. \\
+\hline
+-x $<int>$ \newline (maxReads) & Specify the maximum number of mapped reads a SNP-containing location could have. 3000 by default. Any location having more than the threshold number of reads will not be considered for SNP calling. This option is useful for removing PCR artefacts. \\
+\hline
+\end{longtable}
+
+
+
+\chapter{Case studies}
+
+\section{A Bioconductor R pipeline for analyzing RNA-seq data}
+
+Here we illustrate how to use two Bioconductor packages - {\Rsubread} and {\limma} - to perform a complete RNA-seq analysis, including {\Subread} read mapping, {\featureCounts} read summarization, {\voom} normalization and {\limma} differential expresssion analysis.\\
+
+{\noindent\bf Data and software.} The RNA-seq data used in this case study include four libraries: A\_1, A\_2, B\_1 and B\_2.
+Sample A is Universal Human Reference RNA (UHRR) and sample B is Human Brain Reference RNA (HBRR).
+A\_1 and A\_2 are two replicates of sample A (undergoing separate sample preparation), and B\_1 and B\_2 are two replicates of sample B.
+In this case study, A\_1 and A\_2 are treated as biological replicates although they are more like technical replicates.
+B\_1 and B\_2 are treated as biological replicates as well.
+
+Note that these libraries only included reads originating from human chromosome 1 (according to {\Subread} aligner).
+Reads were generated by the MAQC/SEQC Consortium.
+Data used in this case study can be downloaded from\\
+\url{http://bioinf.wehi.edu.au/RNAseqCaseStudy/data.tar.gz} (283MB).
+Both read data and reference sequence for chromosome 1 of human genome (GRCh37) were included in the data.
+
+After downloading the data, you can uncompress it and save it to your current working directory.
+Launch {\R} and load {\Rsubread}, {\limma} and {\edgeR} libraries by issuing the following commands at your R prompt.
+Version of your {\R} should be 3.0.2 or later.
+{\Rsubread} version should be 1.12.1 or later and {\limma} version should be 3.18.0 or later.
+Note that this case study only runs on Linux/Unix and Mac OS X.
+
+\begin{Rcode}
+library(Rsubread)
+library(limma)
+library(edgeR)
+\end{Rcode}
+
+To install/update {\Rsubread} and {\limma} packages, issue the following commands at your R prompt:
+\begin{Rcode}
+source("http://bioconductor.org/biocLite.R")
+biocLite(pkgs=c("Rsubread","limma","edgeR"))
+\end{Rcode}
+
+{\noindent\bf Index building.} Build an index for human chromosome 1. This typically takes $\sim$3 minutes. Index files with basename `chr1' will be generated in your current working directory.
+
+\begin{Rcode}
+buildindex(basename="chr1",reference="hg19_chr1.fa")
+\end{Rcode}
+
+{\noindent\bf Alignment.} Perform read alignment for all four libraries and report uniquely mapped reads only. This typically takes $\sim$5 minutes. BAM files containing the mapping results will be generated in your current working directory.
+
+\begin{Rcode}
+targets <- readTargets()
+align(index="chr1",readfile1=targets$InputFile,input_format="gzFASTQ",output_format="BAM",
+output_file=targets$OutputFile,tieBreakHamming=TRUE,unique=TRUE,indels=5)
+\end{Rcode}
+
+{\noindent\bf Read summarization.} Summarize mapped reads to NCBI RefSeq genes.
+This will only take a few seconds.
+Note that the {\featureCounts} function contains built-in RefSeq annotations for human and mouse genes.
+{\featureCounts} returns an {\R} `List' object, which includes raw read count for each gene in each library and also annotation information such as gene identifiers and gene lengths.
+
+\begin{Rcode}
+fc <- featureCounts(files=targets$OutputFile,annot.inbuilt="hg19")
+
+fc$counts[1:5,]
+          A_1.bam A_2.bam B_1.bam B_2.bam
+653635        642     522     591     596
+100422834       1       0       0       0
+645520          5       3       0       0
+79501           0       0       0       0
+729737         82      72      30      25
+
+fc$annotation[1:5,c("GeneID","Length")]
+     GeneID Length
+1    653635   1769
+2 100422834    138
+3    645520   1130
+4     79501    918
+5    729737   3402
+\end{Rcode}
+
+Create a {\DGEList} object.
+\begin{Rcode}
+x <- DGEList(counts=fc$counts, genes=fc$annotation[,c("GeneID","Length")])
+\end{Rcode}
+
+Calculate RPKM (reads per kilobases of exon per million reads mapped) values for genes:
+\begin{Rcode}
+x_rpkm <- rpkm(x,x$genes$Length,prior.count=0)
+
+x_rpkm[1:5,]
+          A_1.bam A_2.bam B_1.bam B_2.bam
+653635        939   905.0     709     736
+100422834      19     0.0       0       0
+645520         11     8.1       0       0
+79501           0     0.0       0       0
+729737         62    64.9      19      16
+\end{Rcode}
+
+
+{\noindent\bf Filtering.} Only keep in the analysis those genes which had $>$10 reads per million mapped reads in at least two libraries.
+
+\begin{Rcode}
+isexpr <- rowSums(cpm(x) > 10) >= 2
+x <- x[isexpr,]
+\end{Rcode}
+
+{\noindent\bf Design matrix.} Create a design matrix: 
+
+\begin{Rcode}
+celltype <- factor(targets$CellType)
+design <- model.matrix(~0+celltype)
+colnames(design) <- levels(celltype)
+\end{Rcode}
+
+{\noindent\bf Normalization.} Perform {\voom} normalization: 
+
+\begin{Rcode}
+y <- voom(x,design,plot=TRUE)
+\end{Rcode}
+
+The figure below shows the mean-variance relationship estimated by voom.
+\begin{center}
+\includegraphics[scale=0.3]{voom_mean_variance.png}
+\end{center}
+
+{\noindent\bf Sample clustering.} Multi-dimensional scaling (MDS) plot shows that sample A libraries are clearly separated from sample B libraries.
+
+\begin{Rcode}
+plotMDS(y,xlim=c(-2.5,2.5))
+\end{Rcode}
+
+\begin{center}
+\includegraphics[scale=0.5]{MDSplot.png}
+\end{center}
+
+{\noindent\bf Linear model fitting and differential expression analysis.} Fit linear models to genes and assess differential expression using eBayes moderated t statistic.
+Here we compare sample B vs sample A.
+
+\begin{Rcode}
+fit <- lmFit(y,design)
+contr <- makeContrasts(BvsA=B-A,levels=design)
+fit.contr <- eBayes(contrasts.fit(fit,contr))
+dt <- decideTests(fit.contr)
+summary(dt)
+   BvsA
+-1  922
+0   333
+1   537
+\end{Rcode}
+
+List top 10 differentially expressed genes: 
+
+\begin{Rcode}
+options(digits=2)
+topTable(fit.contr)
+             GeneID Length logFC AveExpr   t P.Value adj.P.Val  B
+100131754 100131754   1019   1.6      16 113 3.5e-28   6.3e-25 54
+2023           2023   1812  -2.7      13 -91 2.2e-26   1.9e-23 51
+2752           2752   4950   2.4      13  82 1.5e-25   9.1e-23 49
+22883         22883   5192   2.3      12  64 1.8e-23   7.9e-21 44
+6135           6135    609  -2.2      12 -62 3.1e-23   9.5e-21 44
+6202           6202    705  -2.4      12 -62 3.2e-23   9.5e-21 44
+4904           4904   1546  -3.0      11 -60 5.5e-23   1.4e-20 43
+23154         23154   3705   3.7      11  55 2.9e-22   6.6e-20 41
+8682           8682   2469   2.6      12  49 2.2e-21   4.3e-19 39
+6125           6125   1031  -2.0      12 -48 3.1e-21   5.6e-19 39
+\end{Rcode}
+
+
+\bibliographystyle{unsrt}
+\bibliography{SubreadUsersGuide}
+
+
+\end{document}
diff --git a/doc/indel.png b/doc/indel.png
new file mode 100644
index 0000000..659d7c2
Binary files /dev/null and b/doc/indel.png differ
diff --git a/doc/junction.png b/doc/junction.png
new file mode 100644
index 0000000..0fe274d
Binary files /dev/null and b/doc/junction.png differ
diff --git a/doc/seed-and-vote.png b/doc/seed-and-vote.png
new file mode 100644
index 0000000..7a59415
Binary files /dev/null and b/doc/seed-and-vote.png differ
diff --git a/doc/voom_mean_variance.png b/doc/voom_mean_variance.png
new file mode 100644
index 0000000..f3b4302
Binary files /dev/null and b/doc/voom_mean_variance.png differ
diff --git a/src/HelperFunctions.c b/src/HelperFunctions.c
index 0549b70..0ceea7b 100644
--- a/src/HelperFunctions.c
+++ b/src/HelperFunctions.c
@@ -23,11 +23,11 @@
 #include "subread.h"
 #include "HelperFunctions.h"
 
-int RSubread_parse_CIGAR_string(const char * CIGAR_Str, unsigned int * Staring_Points, unsigned short * Section_Length)
+int RSubread_parse_CIGAR_string(const char * CIGAR_Str, int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos, unsigned short * Section_Chro_Length, int * is_junction_read)
 {
 	unsigned int tmp_int=0;
 	int cigar_cursor=0;
-	unsigned short read_cursor=0;
+	unsigned short current_section_chro_len=0, current_section_start_read_pos = 0, read_cursor = 0;
 	unsigned int chromosome_cursor=0;
 	int ret=0;
 
@@ -41,28 +41,32 @@ int RSubread_parse_CIGAR_string(const char * CIGAR_Str, unsigned int * Staring_P
 		}
 		else
 		{
-			if(ch == 'M' || ch == 'D')
-			{
+			if(ch == 'S')
+				read_cursor += tmp_int;
+			else if(ch == 'M') {
 				read_cursor += tmp_int;
+				current_section_chro_len += tmp_int;
 				chromosome_cursor += tmp_int;
-			}
-			else if(ch == 'N' || ch == 0)
-			{
-				if(ret <6)
+			} else if(ch == 'N' || ch == 'D' || ch=='I' || ch == 0) {
+				if('N' == ch)(*is_junction_read)=1;
+				if(ret < FC_CIGAR_PARSER_ITEMS)
 				{
-					if(read_cursor>0)
+					if(current_section_chro_len>0)
 					{
-						Staring_Points[ret] = chromosome_cursor - read_cursor;
-						Section_Length[ret] = read_cursor;
+						Section_Start_Chro_Pos[ret] = chromosome_cursor - current_section_chro_len;
+						Section_Start_Read_Pos[ret] = current_section_start_read_pos;
+						Section_Chro_Length[ret] = current_section_chro_len;
 						ret ++;
 					}
 				}
-				read_cursor = 0;
+				current_section_chro_len = 0;
+				if(ch == 'I') read_cursor += tmp_int;
+				else if(ch == 'N' || ch == 'D') chromosome_cursor += tmp_int;
+				current_section_start_read_pos = read_cursor;
 
-				if(ch == 'N') chromosome_cursor += tmp_int;
-				else break;
+				if(ch == 0) break;
 			}
-			//printf("C=%c, TV=%d, CC=%d, RC=%d\n", ch, tmp_int, chromosome_cursor, read_cursor);
+			//printf("C=%c, TV=%d, CC=%d, RC=%d\n", ch, tmp_int, chromosome_cursor, current_section_chro_len);
 			tmp_int = 0;
 		}
 		if(cigar_cursor>100) return -1;
@@ -73,15 +77,18 @@ int RSubread_parse_CIGAR_string(const char * CIGAR_Str, unsigned int * Staring_P
 
 void display_sections(char * CIGAR_Str)
 {
-	unsigned int Staring_Points[6];
-	unsigned short Section_Length[6];
-	int retv = RSubread_parse_CIGAR_string(CIGAR_Str, Staring_Points, Section_Length);
+	int is_junc=0;
+	int Section_Start_Chro_Pos[FC_CIGAR_PARSER_ITEMS];
+	unsigned short Section_Start_Read_Pos[FC_CIGAR_PARSER_ITEMS];
+	unsigned short Section_Chro_Length[FC_CIGAR_PARSER_ITEMS];
+
+	int retv = RSubread_parse_CIGAR_string(CIGAR_Str, Section_Start_Chro_Pos, Section_Start_Read_Pos, Section_Chro_Length, &is_junc);
 
 	int x1;
 	SUBREADprintf("Cigar=%s ; Sections=%d\n", CIGAR_Str, retv);
 	for(x1=0; x1<retv; x1++)
 	{
-		SUBREADprintf("   Section #%d: offset=%u  length=%u\n",x1, Staring_Points[x1], Section_Length[x1]);
+		SUBREADprintf("   Section #%d: chro_offset=%d, read_offset=%u  length=%u\n",x1, Section_Start_Chro_Pos[x1], Section_Start_Read_Pos[x1], Section_Chro_Length[x1]);
 	}
 	SUBREADprintf("\n");
 	
@@ -331,7 +338,7 @@ void main()
 void testi_helper_1_main()
 #endif
 {
-	hpl_test2_func();
+	hpl_test1_func();
 }
 
 char *str_replace(char *orig, char *rep, char *with) {
diff --git a/src/HelperFunctions.h b/src/HelperFunctions.h
index 5602abe..254627a 100644
--- a/src/HelperFunctions.h
+++ b/src/HelperFunctions.h
@@ -29,7 +29,7 @@
 
 // This function returns the number of sections found in the CIGAR string. It returns -1 if the CIGAR string cannot be parsed.
 
-int RSubread_parse_CIGAR_string(const char * CIGAR_Str, unsigned int * Staring_Points, unsigned short * Section_Length);
+int RSubread_parse_CIGAR_string(const char * CIGAR_Str, int * Staring_Chro_Points, unsigned short * Section_Start_Read_Pos, unsigned short * Section_Length, int * is_junction_read);
 
 
 // This function try to find the attribute value of a given attribute name from the extra column string in GTF/GFF.
diff --git a/src/core-junction.c b/src/core-junction.c
index 97ccbf3..0bba398 100644
--- a/src/core-junction.c
+++ b/src/core-junction.c
@@ -1854,8 +1854,8 @@ int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t *
 				sprintf(piece_cigar+strlen(piece_cigar), "%d%c", abs(event_after->indel_length), event_after->indel_length>0?'D':'I');
 			else if(event_after -> event_type == CHRO_EVENT_TYPE_JUNCTION||event_after -> event_type == CHRO_EVENT_TYPE_FUSION)
 			{
-				char jump_mode = current_section -> is_connected_to_large_side?'B':'N';
-				if(event_after -> is_strand_jumped) jump_mode = tolower(jump_mode);
+				//char jump_mode = current_section -> is_connected_to_large_side?'B':'N';
+				//if(event_after -> is_strand_jumped) jump_mode = tolower(jump_mode);
 
 				// the distance in CIGAR is the NEXT UNWANTED BASE of piece#1 to the FIRST WANTED BASE in piece#2
 				int delta_one ;
@@ -1880,8 +1880,24 @@ int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t *
 							delta_one -= (next_section->read_pos_end - next_section-> read_pos_start - 1);
 					}
 				}
+
+                                                char jump_mode = current_section -> is_connected_to_large_side?'B':'N';
+                                                long long int movement = event_after -> event_large_side;
+                                                movement -= event_after -> event_small_side - delta_one;
+                                                if(1){
+                                                        if(jump_mode == 'B' && movement < 0){
+                                                                movement = - movement;
+                                                                jump_mode = 'N';
+                                                        }else if(jump_mode == 'N' && movement < 0){
+                                                                movement = - movement;
+                                                                jump_mode = 'B';
+                                                        }
+                                                }
+
+                                                if(event_after -> is_strand_jumped) jump_mode = tolower(jump_mode);
+
 				
-				sprintf(piece_cigar+strlen(piece_cigar), "%u%c", event_after -> event_large_side - event_after -> event_small_side + delta_one, jump_mode);
+				sprintf(piece_cigar+strlen(piece_cigar), "%u%c", (int)movement, jump_mode);
 				if(event_after -> indel_at_junction) sprintf(piece_cigar+strlen(piece_cigar), "%dI", event_after -> indel_at_junction);
 				is_junction_read ++;
 			}
@@ -1928,7 +1944,7 @@ int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t *
 
 	//#warning " ========== COMMENT THIS LINE !! ========="
 	//if(explain_context -> pair_number == 999999)
-	if(0 && memcmp(explain_context -> read_name, TTTSNAME, 26)==0)
+	if(0 && memcmp(explain_context -> read_name, "H7TVLADXX140423:2:1112:17883:23072", 32)==0)
 		printf("%s : POS=%u\tCIGAR=%s\tMM=%d > %d?\tVOTE=%d > %0.2f x %d ?\tQUAL=%d\tBRNO=%d\n", explain_context -> read_name, final_position , tmp_cigar, mismatch_bases, applied_mismatch,  result -> selected_votes, global_context -> config.minimum_exonic_subread_fraction,result-> used_subreads_in_vote, final_qual, explain_context -> best_read_id);
 
 	if(mismatch_bases <= applied_mismatch && is_exonic_read_fraction_OK)
diff --git a/src/coverage_calc.c b/src/coverage_calc.c
index 1495758..fbee345 100644
--- a/src/coverage_calc.c
+++ b/src/coverage_calc.c
@@ -10,7 +10,7 @@
 #include "input-files.h" 
 #include "hashtable.h"
 
-#define COVERAGE_MAX_INT 254
+#define COVERAGE_MAX_INT 0x7ffffff0 
 unsigned long long all_counted;
 typedef unsigned int coverage_bin_entry_t;
 int is_BAM_input = 0;
@@ -28,9 +28,10 @@ static struct option cov_calc_long_options[] =
 
 void calcCount_usage()
 {
-	SUBREADprintf("\ncoveageCount v%s\n\n", SUBREAD_VERSION);
-	SUBREADprintf("Counting the coverage of mapped reads at each location on the entire reference genome.\n\n");
-	SUBREADprintf("./ncoveageCount -i <sam_bam_input> -o <output_prefix>\n\n");
+	SUBREADprintf("\ncoverageCount v%s\n\n", SUBREAD_VERSION);
+	SUBREADprintf("This utility program counts the coverage of mapped reads at each location on the entire reference genome. It generates a number of binary files, each corresponding to a chromosome that is listed on the header of the input SAM or BAM file. Each of the binary file consists of many 4-byte integers (little-endian order), indicating the number of reads spanning each location on the corresponded chromosome; the file offset in bytes is calculated by the chromosomal location (zer [...]
+	SUBREADprintf("./coverageCount -i <sam or bam file> -o <output_prefix>\n\n");
+	SUBREADputs("");
 }
 
 void add_chro(char *sam_h)
@@ -126,10 +127,11 @@ int covCalc()
 		}
 		else
 		{
-			unsigned int Staring_Points[6];
-			unsigned short Section_Lengths[6];
+			int Staring_Points[FC_CIGAR_PARSER_ITEMS];
+			unsigned short Staring_Read_Points[FC_CIGAR_PARSER_ITEMS];
+			unsigned short Section_Lengths[FC_CIGAR_PARSER_ITEMS];
 	
-			int flags=0, x1;
+			int flags=0, x1, is_junc = 0;
 			char cigar_str[200];
 			char chro[200];
 			unsigned int pos = 0;
@@ -148,7 +150,7 @@ int covCalc()
 
 			coverage_bin_entry_t * chrbin = (coverage_bin_entry_t*) bin_entry[0];
 			unsigned int chrlen = (void *)( bin_entry[0]) - NULL;
-			int cigar_sections = RSubread_parse_CIGAR_string(cigar_str, Staring_Points, Section_Lengths);
+			int cigar_sections = RSubread_parse_CIGAR_string(cigar_str, Staring_Points, Staring_Read_Points, Section_Lengths, &is_junc);
 			for(x1 = 0; x1 < cigar_sections; x1++)
 			{
 				unsigned int x2;
diff --git a/src/makefile.version b/src/makefile.version
index 5bcf28f..5c6b909 100644
--- a/src/makefile.version
+++ b/src/makefile.version
@@ -1,3 +1,3 @@
-SUBREAD_VERSION="1.4.6-p4"
+SUBREAD_VERSION="1.4.6-p5"
 STATIC_MAKE=
 #STATIC_MAKE= -static
diff --git a/src/readSummary.c b/src/readSummary.c
index 25625f9..2112f22 100644
--- a/src/readSummary.c
+++ b/src/readSummary.c
@@ -111,15 +111,22 @@ typedef struct
 	pthread_spinlock_t input_buffer_lock;
 
 
-	short hits_total_length1[MAX_HIT_NUMBER];
-	short hits_total_length2[MAX_HIT_NUMBER];
+	unsigned short hits_read_start_base1[MAX_HIT_NUMBER];
+	unsigned short hits_read_start_base2[MAX_HIT_NUMBER];
+
+	short hits_read_len1[MAX_HIT_NUMBER];
+	short hits_read_len2[MAX_HIT_NUMBER];
+
 	long hits_indices1 [MAX_HIT_NUMBER];
 	long hits_indices2 [MAX_HIT_NUMBER];
 	long decision_table_ids [MAX_HIT_NUMBER];
-	unsigned char decision_table_votes [MAX_HIT_NUMBER];
+	unsigned short decision_table_votes [MAX_HIT_NUMBER];
 	long decision_table_exon_ids [MAX_HIT_NUMBER];
+	char decision_table_read1_used[MAX_HIT_NUMBER];
+	char decision_table_read2_used[MAX_HIT_NUMBER];
 	long uniq_gene_exonid_table [MAX_HIT_NUMBER];
 	long uniq_gene_table [MAX_HIT_NUMBER];
+	char * read_coverage_bits;
 
 	char * chro_name_buff;
 	z_stream * strm_buffer;
@@ -166,6 +173,8 @@ typedef struct
 	int is_stake_warning_shown;
 	int is_split_alignments_only;
 	int is_duplicate_ignored;
+	int is_first_read_reversed;
+	int is_second_read_straight;
 	int do_not_sort;
 	int reduce_5_3_ends_to_one;
 	int isCVersion;
@@ -180,7 +189,9 @@ typedef struct
 	int longest_chro_name;
 	int five_end_extension;
 	int three_end_extension;
-	int overlap_length_required;
+	int fragment_minimum_overlapping;
+	int calculate_overlapping_lengths;
+	int use_overlapping_break_tie;
 
 	unsigned long long int all_reads;
 
@@ -350,14 +361,15 @@ void print_FC_configuration(fc_thread_global_context_t * global_context, char *
 	print_in_box(80,0,0,"Multi-overlapping reads : %s", global_context->is_multi_overlap_allowed?"counted":"not counted");
 	if(global_context -> is_split_alignments_only)
 		print_in_box(80,0,0,"       Split alignments : required");
-	if(global_context -> overlap_length_required !=1)
-		print_in_box(80,0,0,"      Overlapping bases : %d", global_context -> overlap_length_required);
+	if(global_context -> fragment_minimum_overlapping !=1)
+		print_in_box(80,0,0,"      Overlapping bases : %d", global_context -> fragment_minimum_overlapping);
 	if(global_context -> five_end_extension || global_context -> three_end_extension)
 		print_in_box(80,0,0,"        Read extensions : %d on 5' and %d on 3' ends", global_context -> five_end_extension , global_context -> three_end_extension);
 	if(global_context -> reduce_5_3_ends_to_one)
 		print_in_box(80,0,0,"      Read reduction to : %d' end" , global_context -> reduce_5_3_ends_to_one == REDUCE_TO_5_PRIME_END ?5:3);
 	if(global_context -> is_duplicate_ignored)
 		print_in_box(80,0,0,"       Duplicated Reads : ignored");
+	print_in_box(80,0,0,"      Read orientations : %c%c", global_context->is_first_read_reversed?'r':'f', global_context->is_second_read_straight?'f':'r' );
 
 	if(global_context->is_paired_end_mode_assign)
 	{
@@ -984,8 +996,6 @@ void sort_feature_info(fc_thread_global_context_t * global_context, unsigned int
 	free(chro_feature_ptr);
 }
 
-//#define MAX_HIT_NUMBER 1800
-//
 int strcmp_slash(char * s1, char * s2)
 {
 	char nch;
@@ -1090,6 +1100,13 @@ void report_unpair_warning(fc_thread_global_context_t * global_context, fc_threa
 	(*this_noproperly_paired_added) = 1;
 }
 
+
+void vote_and_add_count(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context,
+			long * hits_indices1, unsigned short * hits_read_start_base1, short * hits_read_len1, int nhits1, unsigned short rl1,
+			long * hits_indices2, unsigned short * hits_read_start_base2, short * hits_read_len2, int nhits2, unsigned short rl2,
+			int fixed_fractional_count, char * read_name);
+
+
 void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context)
 {
 
@@ -1098,7 +1115,9 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 	unsigned int search_start = 0, search_end;
 	int nhits1 = 0, nhits2 = 0, alignment_masks, search_block_id, search_item_id;
 	long * hits_indices1 = thread_context -> hits_indices1, * hits_indices2 = thread_context -> hits_indices2;
-	short * hits_total_length1 = thread_context -> hits_total_length1 ,  * hits_total_length2 = thread_context -> hits_total_length2;
+	unsigned short * hits_read_start_base1 = thread_context -> hits_read_start_base1 ,  * hits_read_start_base2 = thread_context -> hits_read_start_base2;
+	short * hits_read_len1 = thread_context -> hits_read_len1, * hits_read_len2 = thread_context -> hits_read_len2;
+	unsigned short cigar_read_len1 = 0, cigar_read_len2 = 0;
 
 	int is_second_read;
 	int maximum_NH_value = 1;
@@ -1386,7 +1405,11 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 
 		if( global_context -> is_paired_end_mode_assign ){
 			int is_second_read_in_pair = alignment_masks & SAM_FLAG_SECOND_READ_IN_PAIR;
-			is_fragment_negative_strand = is_second_read_in_pair?(!is_this_negative_strand):is_this_negative_strand;
+			//is_fragment_negative_strand = is_second_read_in_pair?(!is_this_negative_strand):is_this_negative_strand;
+			if(is_second_read_in_pair)
+				is_fragment_negative_strand = global_context -> is_second_read_straight?is_this_negative_strand:(!is_this_negative_strand);
+			else
+				is_fragment_negative_strand = global_context -> is_first_read_reversed?(!is_this_negative_strand):is_this_negative_strand;
 		}
 
 		fc_chromosome_index_info * this_chro_info = HashTableGet(global_context -> exontable_chro_table, read_chr);
@@ -1415,21 +1438,25 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 		{
 			int nhits = 0;
 
-			int cigar_section_id, cigar_sections;
-			unsigned int Starting_Points[6];
-			unsigned short Section_Lengths[6];
+			int cigar_section_id, cigar_sections, is_junction_read = 0;
+			int Starting_Chro_Points[FC_CIGAR_PARSER_ITEMS];
+			unsigned short Starting_Read_Points[FC_CIGAR_PARSER_ITEMS];
+			unsigned short Section_Lengths[FC_CIGAR_PARSER_ITEMS];
 			long * hits_indices = (is_second_read?hits_indices2:hits_indices1);
-			short * hits_total_length = (is_second_read?hits_total_length2:hits_total_length1);
+			unsigned short * hits_read_start_base = is_second_read?hits_read_start_base2:hits_read_start_base1;
+			short * hits_read_len = is_second_read?hits_read_len2:hits_read_len1;
+			unsigned short * cigar_read_len = is_second_read?&cigar_read_len2:&cigar_read_len1;
 
-			cigar_sections = RSubread_parse_CIGAR_string(CIGAR_str, Starting_Points, Section_Lengths);
+			cigar_sections = RSubread_parse_CIGAR_string(CIGAR_str, Starting_Chro_Points, Starting_Read_Points, Section_Lengths, &is_junction_read);
 
+			(*cigar_read_len) = Starting_Read_Points[cigar_sections-1] + Section_Lengths[cigar_sections-1];
 
-			if(cigar_sections>1 || !global_context->is_split_alignments_only) 
+			if(is_junction_read || !global_context->is_split_alignments_only) 
 			{
 
 			//#warning "=================== COMMENT THESE 2 LINES ================================"
 			//for(cigar_section_id = 0; cigar_section_id<cigar_sections; cigar_section_id++)
-			//	SUBREADprintf("BCCC: %llu , sec[%d] %d ~ %d ; secs=%d ; flags=%d ; second=%d\n", read_pos, cigar_section_id , Starting_Points[cigar_section_id], Section_Lengths[cigar_section_id], cigar_sections, alignment_masks, is_second_read);
+			//	SUBREADprintf("BCCC: %llu , sec[%d] %d ~ %d ; secs=%d ; flags=%d ; second=%d\n", read_pos, cigar_section_id , Starting_Chro_Points[cigar_section_id], Section_Lengths[cigar_section_id], cigar_sections, alignment_masks, is_second_read);
 				if(global_context -> reduce_5_3_ends_to_one)
 				{
 					if((REDUCE_TO_5_PRIME_END == global_context -> reduce_5_3_ends_to_one) + is_this_negative_strand == 1) // reduce to 5' end (small coordinate if positive strand / large coordinate if negative strand)
@@ -1438,7 +1465,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 					}
 					else
 					{
-						Starting_Points[0] = Starting_Points[cigar_sections-1] + Section_Lengths[cigar_sections-1] - 1;
+						Starting_Chro_Points[0] = Starting_Chro_Points[cigar_sections-1] + Section_Lengths[cigar_sections-1] - 1;
 						Section_Lengths[0]=1;
 					}
 
@@ -1451,16 +1478,16 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 					if(is_this_negative_strand){
 						Section_Lengths [cigar_sections - 1] += global_context -> five_end_extension;
 					}else{
-						//SUBREADprintf("5-end extension: %d [%d]\n", Starting_Points[0], Section_Lengths[0]);
+						//SUBREADprintf("5-end extension: %d [%d]\n", Starting_Chro_Points[0], Section_Lengths[0]);
 						if( read_pos > global_context -> five_end_extension)
 						{
 							Section_Lengths [0] += global_context -> five_end_extension;
-							read_pos  -= global_context -> five_end_extension;
+							Starting_Chro_Points [0] -= global_context -> five_end_extension;
 						}
 						else
 						{
 							Section_Lengths [0] += read_pos-1;
-							read_pos = 1;
+							Starting_Chro_Points [0] -= read_pos-1;
 						}
 					}
 				}
@@ -1472,12 +1499,12 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 						if( read_pos > global_context -> three_end_extension)
 						{
 							Section_Lengths [0] += global_context -> three_end_extension;
-							read_pos -= global_context -> three_end_extension;
+							Starting_Chro_Points [0] -= global_context -> three_end_extension;
 						}
 						else
 						{
 							Section_Lengths [0] += read_pos - 1;
-							read_pos = 1;
+							Starting_Chro_Points [0] -= read_pos - 1;
 						}
 					}
 					else	Section_Lengths [cigar_sections - 1] += global_context -> three_end_extension;
@@ -1486,19 +1513,14 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 
 			//#warning "=================== COMMENT THESE 2 LINES ================================"
 			//for(cigar_section_id = 0; cigar_section_id<cigar_sections; cigar_section_id++)
-			//	SUBREADprintf("ACCC: %llu , sec[%d] %u ~ %d ; secs=%d\n", read_pos, cigar_section_id, Starting_Points[cigar_section_id], Section_Lengths[cigar_section_id], cigar_sections);
-				if( global_context -> debug_command [0] == 'D')
-					SUBREADprintf("\n\nRead name = %s ; Second read = %d\n", read_name, is_second_read);
+			//	SUBREADprintf("ACCC: %llu , sec[%d] %u ~ %d ; secs=%d\n", read_pos, cigar_section_id, Starting_Chro_Points[cigar_section_id], Section_Lengths[cigar_section_id], cigar_sections);
 
 				for(cigar_section_id = 0; cigar_section_id<cigar_sections; cigar_section_id++)
 				{
-					long section_begin_pos = read_pos + Starting_Points[cigar_section_id];
+					long section_begin_pos = read_pos + Starting_Chro_Points[cigar_section_id];
 					long section_end_pos = Section_Lengths[cigar_section_id] + section_begin_pos - 1;
 
 					
-					if(global_context -> debug_command [0] == 'D')
-						SUBREADprintf("Section [%d]: %d ~ %d\n", cigar_section_id , Starting_Points[cigar_section_id], Starting_Points[cigar_section_id]+Section_Lengths[cigar_section_id]);
-
 					int start_reverse_table_index = section_begin_pos / REVERSE_TABLE_BUCKET_LENGTH;
 					int end_reverse_table_index = (1+section_end_pos) / REVERSE_TABLE_BUCKET_LENGTH;
 
@@ -1524,6 +1546,9 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 						int search_item_start = 0, search_item_end = global_context -> exontable_block_end_index[search_block_id];
 						if(search_block_id>0)search_item_start = global_context -> exontable_block_end_index[search_block_id-1];
 
+						// search_item_id is the inner number of the exons.
+						// the exontables in global_index has search_item_id as the index.
+
 						for(search_item_id = search_item_start ; search_item_id < search_item_end; search_item_id++)
 						{
 							if (global_context -> exontable_stop[search_item_id] >= section_begin_pos)
@@ -1547,11 +1572,12 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 									{
 										hits_indices[nhits] = search_item_id;
 
-										if(global_context -> overlap_length_required !=1)
+										if(global_context -> calculate_overlapping_lengths)
 										{
 											int section_overlapped = min(global_context -> exontable_stop[search_item_id] , section_end_pos) 
-												- max(global_context -> exontable_start[search_item_id] , section_begin_pos) + 1;
-											hits_total_length[nhits] = (short)section_overlapped;
+													       - max(global_context -> exontable_start[search_item_id] , section_begin_pos) + 1;
+											hits_read_len[nhits] = (short)section_overlapped;
+											hits_read_start_base[nhits] = (section_begin_pos > global_context -> exontable_start[search_item_id]?(Starting_Read_Points[cigar_section_id]):(Starting_Read_Points[cigar_section_id] + (global_context -> exontable_start[search_item_id] - section_begin_pos)));
 										}
 
 										nhits++;
@@ -1562,9 +1588,8 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 						}
 					}
 				}
-			}
-			else{
-				if(global_context->is_split_alignments_only)
+			} else {
+				if(global_context->is_split_alignments_only) // must be true.
 				{
 					skipped_for_exonic ++;
 					if((is_second_read && skipped_for_exonic == 2) || (!global_context -> is_paired_end_mode_assign) || (alignment_masks & 0x8))
@@ -1590,60 +1615,57 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 	}	// loop for is_second_read
 
 
-	if(global_context -> overlap_length_required !=1)
-	{
-		// merge feature : if a read overlaps with a feature twice or more times (by >=2 segments), the total length of the overlapped bases is calculated and the features with insufficient overlapped bases are removed.
-		// both meta-feature mode and feature mode use the same strategy.
-		// two ends in a fragment is considered individually; the overlapping bases are not added up.
-		int ends;
-		for(ends = 0; ends < global_context -> is_paired_end_mode_assign + 1; ends++)
-		{
-			long * hits_indices = ends?hits_indices2:hits_indices1;
-			short * hits_total_length = ends?hits_total_length2:hits_total_length1;
-			int  nhits = ends?nhits2:nhits1;
-			int x1;
+	int fixed_fractional_count = global_context -> use_fraction_multi_mapping ?calc_fixed_fraction(maximum_NH_value): NH_FRACTION_INT;
 
-			// calculating the summed lengths of overlapping exons
-			for(x1=0; x1<nhits;x1++)
-			{
-				long exon_no = hits_indices[x1];
-				if(exon_no>=0x7fffffff) continue;	//already removed
-				int x2;
-				for(x2=x1+1; x2<nhits; x2++)
-				{
-					if(hits_indices[x2]==exon_no)
-					{
-						hits_total_length[x1]+=hits_total_length[x2];
-						hits_indices[x2]=0x7fffffff;
-					}
-				}
+	// we have hits_indices1 and hits_indices2 and nhits1 and nhits2 here
+	// we also have fixed_fractional_count which is the value to add
 
-				if( global_context -> debug_command [0] == 'D')
-					SUBREADprintf("HIT[%d]: OVERLAP=%d\n", x1, hits_total_length[x1]);
-				if(hits_total_length[x1]< global_context -> overlap_length_required)
-					hits_indices[x1]=0x7fffffff;
-			}
+	vote_and_add_count(global_context, thread_context,
+			   hits_indices1, hits_read_start_base1, hits_read_len1, nhits1, cigar_read_len1,
+			   hits_indices2, hits_read_start_base2, hits_read_len2, nhits2, cigar_read_len2,
+			   fixed_fractional_count, read_name);
+	return;
+}
 
-			// remove the exons in the hits table when it is marked as removed (0x7fffffff)
-			int new_hits=0;
-			for(x1 = 0; x1< nhits; x1++)
-			{
-				if(hits_indices[x1]>=0x7fffffff) continue;
-				if(new_hits != x1)
-				{
-					hits_indices[new_hits]=hits_indices[x1];
-					hits_total_length[new_hits]=hits_total_length[x1];		
-				}
-				new_hits++;
-			}
-			if(ends) nhits2 = new_hits;
-			else nhits1 = new_hits;
+void add_bitmap_overlapping(char * x1_bitmap, short start_base, short len){
+	int x1;
+	int rl16 = start_base+len-16;
+	for(x1 = start_base; x1 < start_base+len; x1++){
+		int bit = x1 % 8;
+		int byte = x1 / 8;
+		if(bit == 0 && x1 < rl16){
+			x1_bitmap[byte]=-1;
+			x1_bitmap[byte+1]=-1;
+			x1+=15;
+		}else{
+			x1_bitmap[byte] |= (1<<bit);
 		}
 	}
+}
 
-	int fixed_fractional_count = global_context -> use_fraction_multi_mapping ?calc_fixed_fraction(maximum_NH_value): NH_FRACTION_INT;
+int count_bitmap_overlapping(char * x1_bitmap, unsigned short rl){
+
+	int x1;
+	int ret = 0;
+	for(x1 = 0; x1 < rl; x1++){
+		int byte = x1 / 8;
+		int bit = x1 % 8;
+
+		if(bit == 0 && x1_bitmap[byte]==-1){
+			x1 += 7;
+			ret += 8;
+		}else if(x1_bitmap[byte] &  (1<<bit)) ret ++;
+	}
+	return ret;
+}
 
-	if(nhits2+nhits1==1)
+
+void vote_and_add_count(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context,
+			long * hits_indices1, unsigned short * hits_read_start_base1, short * hits_read_len1, int nhits1, unsigned short rl1,
+			long * hits_indices2, unsigned short * hits_read_start_base2, short * hits_read_len2, int nhits2, unsigned short rl2,
+			int fixed_fractional_count, char * read_name)
+{
+	if(global_context -> calculate_overlapping_lengths == 0 && nhits2+nhits1==1)
 	{
 		long hit_exon_id = nhits2?hits_indices2[0]:hits_indices1[0];
 		thread_context->count_table[hit_exon_id] += fixed_fractional_count;
@@ -1656,7 +1678,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 		}
 		thread_context->read_counters.assigned_reads ++;
 	}
-	else if(nhits2 == 1 && nhits1 == 1 && hits_indices2[0]==hits_indices1[0])
+	else if(global_context -> calculate_overlapping_lengths == 0 && nhits2 == 1 && nhits1 == 1 && hits_indices2[0]==hits_indices1[0])
 	{
 		long hit_exon_id = hits_indices1[0];
 		thread_context->count_table[hit_exon_id] += fixed_fractional_count;
@@ -1671,205 +1693,241 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 	}
 	else
 	{
+		int ends;
+		// Build a voting table.
+		// The voting table should be:
+		//      total_length [nhit_final] = total_length_overlapping
+		//      final_id [nhit_final] = final_exon_id
 
-		if(nhits2+nhits1>=MAX_HIT_NUMBER)
-		{
-			print_in_box(80,0,0,"A %s overlapped with %d features.", global_context -> is_paired_end_mode_assign?"fragment":"read", nhits2+nhits1);
-			print_in_box(80,0,0,"Please increase MAX_HIT_NUMBER in the program");
-		}
+		// if is_gene_leven, then decision_table_exon_ids[nhit_final] is the exon id where the count is added.
 
-		long * decision_table_ids = thread_context -> decision_table_ids;
-		unsigned char * decision_table_votes = thread_context ->decision_table_votes;
-		long * decision_table_exon_ids = thread_context -> decision_table_exon_ids;
-		int decision_table_items = 0, xk1, xk2;
+		// After all, the count is added to all hits where total_length has the maximum value.
+		// If there are more than one locations having the same total_length, then the fragment is ambiguous. 
+		// Count is added when "-O" is specified.
 
-		for(is_second_read = 0; is_second_read < 2; is_second_read++)
+		// merge feature : if a read overlaps with an EXON twice or more times (by >=2 segments in cigar),
+		//                 then the total length of the overlapped bases is calculated.
+		// 
+		// two ends in a fragment is considered individually; the overlapping bases are not added up.
+		//
+		char * read_coverage_bits = thread_context -> read_coverage_bits;
+
+		for(ends = 0; ends < global_context -> is_paired_end_mode_assign + 1; ends++)
 		{
-			if(is_second_read && !global_context -> is_paired_end_mode_assign) break;
-			long * hits_indices = is_second_read?hits_indices2:hits_indices1;
-			int nhits = is_second_read?nhits2:nhits1;
-			if (nhits<1) continue;
-			if(global_context -> is_gene_level)
+			long * hits_indices = ends?hits_indices2:hits_indices1;
+			unsigned short * hits_read_start_base = ends?hits_read_start_base2:hits_read_start_base1;
+			short * hits_read_len = ends?hits_read_len2:hits_read_len1;
+			int  nhits = ends?nhits2:nhits1;
+			int x1;
+
+			if(read_coverage_bits)
 			{
-				long * uniq_gene_table = thread_context -> uniq_gene_table;
-				long * uniq_gene_exonid_table = thread_context -> uniq_gene_exonid_table;
-				int uniq_genes = 0;
-				for(xk1=0;xk1<nhits;xk1++)
-				{
-					int gene_id = global_context -> exontable_geneid[hits_indices[xk1]];
-					int is_unique = 1;
-					for(xk2=0; xk2<uniq_genes; xk2++)
-					{
-						if(gene_id == uniq_gene_table[xk2])
-						{
-							is_unique = 0;
-							break;
-						}
-					}
-					if(is_unique){
-						uniq_gene_exonid_table[uniq_genes] = hits_indices[xk1];
-						uniq_gene_table[uniq_genes++] = gene_id;
-						if(uniq_genes >= MAX_HIT_NUMBER) break;
-					}
-				}
+				int covergae_bitmap_size = max(rl1,rl2)/8+2; 
+				for(x1 = 0; x1 < nhits; x1++)
+					memset(read_coverage_bits +  x1 * (MAX_READ_LENGTH / 8 + 1), 0, covergae_bitmap_size);
+			}
+
+			// calculating the summed lengths of overlapping exons
+			for(x1=0; x1<nhits;x1++)
+			{
+				long exon_no = hits_indices[x1];
+				if(exon_no>=0x7fffffff) continue;	//already removed
+
+				char * x1_bitmap = read_coverage_bits + (MAX_READ_LENGTH /8 +1) * x1;
+
+				//if(FIXLENstrcmp("V0112_0155:7:1308:19321:196983", read_name)==0)
+				//	SUBREADprintf("CREATE bitmap: for x1 = %d, on read %d len = %d \n", x1, hits_read_start_base[x1], hits_read_len[x1]);
 
-				for(xk1=0;xk1<uniq_genes; xk1++)
+				if(read_coverage_bits)
+					add_bitmap_overlapping(x1_bitmap, hits_read_start_base[x1], hits_read_len[x1]);
+
+				long merge_key = global_context -> is_gene_level? global_context -> exontable_geneid[exon_no] : exon_no;
+				int x2;
+				for(x2=x1+1; x2<nhits; x2++)
 				{
-					long gene_id = uniq_gene_table[xk1];
-					int is_fresh = 1;
-					if(decision_table_items >= MAX_HIT_NUMBER) break;
-					for(xk2=0; xk2<decision_table_items; xk2++)
+					long tomerge_exon_no = hits_indices[x2];
+					if(tomerge_exon_no >=0x7fffffff) continue;
+
+					long tomerge_key = global_context -> is_gene_level? global_context -> exontable_geneid[tomerge_exon_no] : tomerge_exon_no;
+					if(tomerge_key==merge_key)
 					{
-						if(gene_id == decision_table_ids[xk2])
-						{
-							decision_table_votes[xk2]++;
-							is_fresh = 0;
-							break;
-						}
+						if(read_coverage_bits)
+							add_bitmap_overlapping(x1_bitmap, hits_read_start_base[x2], hits_read_len[x2]);
+
+						//if(FIXLENstrcmp("V0112_0155:7:1308:19321:196983", read_name)==0)
+						//	SUBREADprintf("APPEND bitmap: for x1 = %d, on read %d len = %d \n", x1, hits_read_start_base[x2], hits_read_len[x2]);
+
+						//TODO: change this part to uniquely-overlapping length. Not simply adding.
+						//hits_read_start_base[x1]+=hits_read_start_base[x2];
+						hits_indices[x2]=0x7fffffff;
 						
 					}
-					if(is_fresh)
-					{
-						decision_table_votes[decision_table_items] = 1;
-						decision_table_exon_ids[decision_table_items] = uniq_gene_exonid_table[xk1];
-						decision_table_ids[decision_table_items++] = gene_id;
-					}
 				}
 			}
-			else
+
+			// remove the exons in the hits table when it is marked as removed (0x7fffffff)
+			int new_hits=0;
+			for(x1 = 0; x1< nhits; x1++)
 			{
-				for(xk1=0;xk1<nhits;xk1++)
+				if(hits_indices[x1]>=0x7fffffff) continue;
+
+				if(new_hits != x1)
+					hits_indices[new_hits]=hits_indices[x1];
+				
+				if(global_context -> calculate_overlapping_lengths)
 				{
-					long exon_id = hits_indices[xk1];
-					//long exon_id = global_context -> exontable_geneid[hits_indices[xk1]];
-					int is_fresh = 1;
-					if(decision_table_items >= MAX_HIT_NUMBER) break;
-					for(xk2=0; xk2<decision_table_items; xk2++)
-					{
-						if(exon_id == decision_table_ids[xk2])
-						{
-							decision_table_votes[xk2]++;
-							is_fresh = 0;
-							break;
-						}
-					}
-					if(is_fresh)
+					char * x1_bitmap = read_coverage_bits + (MAX_READ_LENGTH /8 +1) * x1;
+					hits_read_len[new_hits]=count_bitmap_overlapping(x1_bitmap, ends?rl2:rl1);//hits_read_len[x1];
+				}
+
+				new_hits++;
+			}
+			if(ends) nhits2 = new_hits;
+			else nhits1 = new_hits;
+		}
+
+		unsigned short * decision_total_lengths = thread_context ->decision_table_votes;
+		int decision_number = 0, decision_table_no;
+		long * decision_table_exon_ids = thread_context -> decision_table_exon_ids;
+		long * decision_table_keys = thread_context -> decision_table_ids;
+		char * read1_used =  thread_context -> decision_table_read1_used;
+		char * read2_used =  thread_context -> decision_table_read2_used;
+
+		for(ends =0 ; ends < global_context -> is_paired_end_mode_assign + 1 ; ends++){
+			int nhits = ends?nhits2:nhits1;
+			short * hits_read_len = ends?hits_read_len2:hits_read_len1;
+			long * hits_indices = ends?hits_indices2:hits_indices1;
+			int hit_x1;
+			for(hit_x1 = 0; hit_x1 < nhits ; hit_x1++){
+				int decision_i;
+				long decision_key; // gene_id if is_gene_level, or exon_id.
+				long decision_exon_id;
+				if (global_context -> is_gene_level ){
+					decision_exon_id = hits_indices[hit_x1];
+					decision_key = global_context -> exontable_geneid[decision_exon_id];
+				}else
+					decision_exon_id = decision_key = hits_indices[hit_x1];
+
+				// get decision_table_no from decision_key
+				// add into decision_tables if decision_key is unfound.
+				decision_table_no = -1;
+				for(decision_i = 0; decision_i < decision_number ; decision_i ++){
+					if(decision_key == decision_table_keys[decision_i])
 					{
-						decision_table_votes[decision_table_items] = 1;
-						decision_table_ids[decision_table_items++] = exon_id;
+						decision_table_no = decision_i;
+						break;
 					}
+				}
+
+				if(decision_table_no<0){
+					decision_table_exon_ids[decision_number] = decision_exon_id;
+					decision_table_keys[decision_number] = decision_key;
+					decision_total_lengths[decision_number] = 0;
+					read1_used[decision_number]=0;
+					read2_used[decision_number]=0;
 
+					decision_table_no = decision_number;
+					decision_number ++;
 				}
-			}
 
-		}
+				if(ends)read2_used[decision_table_no] = 1;
+				else read1_used[decision_table_no] = 1;
 
-		/*
-		printf("R has %d hits\n", decision_table_items);
-		int i;
-		for(i=0; i<decision_table_items; i++)
-		{
-			long geneid = global_context -> exontable_geneid[decision_table_ids[i]] ;
-			printf("%ld : %ld,   ",decision_table_ids[i], geneid);
-		}
-		puts("");
+				assert(read2_used[decision_table_no]<2);
+				assert(read1_used[decision_table_no]<2);
 
-		*/
+				if(global_context -> calculate_overlapping_lengths)
+					decision_total_lengths[decision_table_no] += hits_read_len[hit_x1];
 
-		if(decision_table_items>0)
-		{
-			int max_votes = 0;
-			int top_voters = 0;
-			long top_voter_id = 0;
+			}
+		}
 
-			for(xk1 = 0; xk1 < decision_table_items; xk1++)
+		int maximum_decision_score = 0;
+		int maximum_total_count = 0;
+		int maximum_decision_no = 0;
+		for(decision_table_no = 0; decision_table_no < decision_number; decision_table_no++)
+		{
+			if(global_context -> fragment_minimum_overlapping == 1 || decision_total_lengths[decision_table_no] >= global_context -> fragment_minimum_overlapping)
 			{
-				if(decision_table_votes[xk1] > max_votes)
-				{
-					max_votes = decision_table_votes[xk1];
-					top_voters = 1;
-					top_voter_id = (global_context -> is_gene_level)?decision_table_exon_ids[xk1]:decision_table_ids[xk1];
-				}
-				else
-					if(decision_table_votes[xk1] == max_votes) top_voters++;
+				int this_decision_score = global_context -> use_overlapping_break_tie? decision_total_lengths[decision_table_no] :(read1_used[decision_table_no] + read2_used[decision_table_no]);
+				if(!global_context -> use_overlapping_break_tie)assert(this_decision_score<3);
+				//SUBREADprintf("%s LEN[%d] = %d , score=%d\n", read_name, decision_table_no ,  decision_total_lengths[decision_table_no], this_decision_score );
+				if(maximum_decision_score < this_decision_score){
+					maximum_total_count = 1;
+					maximum_decision_no = decision_table_no;
+					maximum_decision_score = this_decision_score;
+				}else if(maximum_decision_score == this_decision_score)
+					maximum_total_count ++;
 			}
+		}
 
-			if(top_voters == 1 && (!global_context -> is_multi_overlap_allowed))
-			{
-				thread_context->count_table[top_voter_id] += fixed_fractional_count;
+		if(maximum_total_count == 0){
+			if(global_context -> SAM_output_fp)
+				fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_NoFeatures\t*\t*\n", read_name);
+
+			thread_context->read_counters.unassigned_nofeatures ++;
+		}else{
+
+			// final adding votes.
+			if(1 == maximum_total_count && !global_context -> is_multi_overlap_allowed) {
+				// simple add to the exon ( EXON_ID = decision_table_exon_ids[maximum_decision_no])
+				long max_exon_id = decision_table_exon_ids[maximum_decision_no];
+				thread_context->count_table[max_exon_id] += fixed_fractional_count;
 				thread_context->nreads_mapped_to_exon++;
 				if(global_context -> SAM_output_fp)
 				{
-					int final_gene_number = global_context -> exontable_geneid[top_voter_id];
+					int final_gene_number = global_context -> exontable_geneid[max_exon_id];
 					unsigned char * final_feture_name = global_context -> gene_name_array[final_gene_number];
-					if(decision_table_items>1)
-						// assigned by read-voting
-						fprintf(global_context -> SAM_output_fp,"%s\tAssigned\t%s\tVotes/Targets=%d/%d\n", read_name, final_feture_name, max_votes, decision_table_items);
+					if(decision_number>1)
+						fprintf(global_context -> SAM_output_fp,"%s\tAssigned\t%s\t%s/Targets=%d/%d\n", read_name, final_feture_name, global_context -> use_overlapping_break_tie? "MaximumOverlapping":"Votes", maximum_decision_score, decision_number);
 					else
 						fprintf(global_context -> SAM_output_fp,"%s\tAssigned\t%s\t*\n", read_name, final_feture_name);
 				}
 				thread_context->read_counters.assigned_reads ++;
 
-			}
-			else if(top_voters >1 || (global_context -> is_multi_overlap_allowed))
-			{
-				if(global_context -> is_multi_overlap_allowed)
+			}else if(global_context -> is_multi_overlap_allowed) {
+				char final_feture_names[1000];
+				int assigned_no = 0, xk1;
+				final_feture_names[0]=0;
+				for(xk1 = 0; xk1 < decision_number; xk1++)
 				{
-					char final_feture_names[1000];
-					int assigned_no = 0;
-					final_feture_names[0]=0;
-					for(xk1 = 0; xk1 < decision_table_items; xk1++)
-					{
-						//if(decision_table_votes[xk1] == max_votes)
-						if(decision_table_votes[xk1] >= 1)
-						{
-							long tmp_voter_id = (global_context -> is_gene_level)?decision_table_exon_ids[xk1]:decision_table_ids[xk1];
-							//printf("TVID=%ld; HITID=%d\n", tmp_voter_id, xk1);
-							thread_context->count_table[tmp_voter_id] += fixed_fractional_count;
+					long tmp_voter_id = decision_table_exon_ids[xk1];
+					thread_context->count_table[tmp_voter_id] += fixed_fractional_count;
 
-							if(global_context -> SAM_output_fp)
-							{
-								if(strlen(final_feture_names)<700) 
-								{
-										int final_gene_number = global_context -> exontable_geneid[tmp_voter_id];
-										unsigned char * final_feture_name = global_context -> gene_name_array[final_gene_number];
-										strncat(final_feture_names, (char *)final_feture_name, 999);
-										strncat(final_feture_names, ",", 999);
-										assigned_no++;
-								}
-							}
-						}
-					}
-					final_feture_names[999]=0;
-					thread_context->nreads_mapped_to_exon++;
 					if(global_context -> SAM_output_fp)
 					{
-						int ffnn = strlen(final_feture_names);
-						if(ffnn>0) final_feture_names[ffnn-1]=0;
-						// overlapped but still assigned 
-						fprintf(global_context -> SAM_output_fp,"%s\tAssigned\t%s\tTotal=%d\n", read_name, final_feture_names, assigned_no);
+						if(strlen(final_feture_names)<700)
+						{
+							int final_gene_number = global_context -> exontable_geneid[tmp_voter_id];
+							unsigned char * final_feture_name = global_context -> gene_name_array[final_gene_number];
+							strncat(final_feture_names, (char *)final_feture_name, 999);
+							strncat(final_feture_names, ",", 999);
+							assigned_no++;
+						}
 					}
-					thread_context->read_counters.assigned_reads ++;
-
 				}
-				else{
-					if(global_context -> SAM_output_fp)
-						fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Ambiguit\t*\tNumber_Of_Overlapped_Genes=%d\n", read_name, top_voters);
-
-					thread_context->read_counters.unassigned_ambiguous ++;
+				final_feture_names[999]=0;
+				thread_context->nreads_mapped_to_exon++;
+				if(global_context -> SAM_output_fp)
+				{
+					int ffnn = strlen(final_feture_names);
+					if(ffnn>0) final_feture_names[ffnn-1]=0;
+					// overlapped but still assigned 
+					fprintf(global_context -> SAM_output_fp,"%s\tAssigned\t%s\tTotal=%d\n", read_name, final_feture_names, assigned_no);
 				}
-			}
-		}
-		else{
-			if(global_context -> SAM_output_fp)
-				fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_NoFeatures\t*\t*\n", read_name);
+				thread_context->read_counters.assigned_reads ++;
 
-			thread_context->read_counters.unassigned_nofeatures ++;
+			} else {
+				if(global_context -> SAM_output_fp)
+					fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Ambiguit\t*\tNumber_Of_Overlapped_Genes=%d\n", read_name, maximum_total_count);
+
+				thread_context->read_counters.unassigned_ambiguous ++;
+			}
 		}
 	}
-
 }
 
+
 void * feature_count_worker(void * vargs)
 {
 	void ** args = (void **) vargs;
@@ -2147,7 +2205,7 @@ HashTable * load_alias_table(char * fname)
 	return ret;
 }
 
-void fc_thread_init_global_context(fc_thread_global_context_t * global_context, unsigned int buffer_size, unsigned short threads, int line_length , int is_PE_data, int min_pe_dist, int max_pe_dist, int is_gene_level, int is_overlap_allowed, int is_strand_checked, char * output_fname, int is_sam_out, int is_both_end_required, int is_chimertc_disallowed, int is_PE_distance_checked, char *feature_name_column, char * gene_id_column, int min_map_qual_score, int is_multi_mapping_allowed, int i [...]
+void fc_thread_init_global_context(fc_thread_global_context_t * global_context, unsigned int buffer_size, unsigned short threads, int line_length , int is_PE_data, int min_pe_dist, int max_pe_dist, int is_gene_level, int is_overlap_allowed, int is_strand_checked, char * output_fname, int is_sam_out, int is_both_end_required, int is_chimertc_disallowed, int is_PE_distance_checked, char *feature_name_column, char * gene_id_column, int min_map_qual_score, int is_multi_mapping_allowed, int i [...]
 {
 
 	global_context -> input_buffer_max_size = buffer_size;
@@ -2168,6 +2226,9 @@ void fc_thread_init_global_context(fc_thread_global_context_t * global_context,
 	global_context -> is_multi_mapping_allowed = is_multi_mapping_allowed;
 	global_context -> is_split_alignments_only = is_split_alignments_only;
 	global_context -> is_duplicate_ignored = is_duplicate_ignored;
+	global_context -> is_first_read_reversed = (pair_orientations[0]=='r');
+	global_context -> is_second_read_straight = (pair_orientations[1]=='f');
+
 	global_context -> reduce_5_3_ends_to_one = reduce_5_3_ends_to_one;
 	global_context -> do_not_sort = is_not_sort;
 	global_context -> is_SAM_file = is_SAM;
@@ -2184,7 +2245,9 @@ void fc_thread_init_global_context(fc_thread_global_context_t * global_context,
 	global_context -> feature_block_size = feature_block_size;
 	global_context -> five_end_extension = fiveEndExtension;
 	global_context -> three_end_extension = threeEndExtension;
-	global_context -> overlap_length_required = minReadOverlap;
+	global_context -> fragment_minimum_overlapping = minFragmentOverlap;
+	global_context -> use_overlapping_break_tie = useOverlappingBreakTie;
+	global_context -> calculate_overlapping_lengths = (global_context -> fragment_minimum_overlapping > 1) || global_context -> use_overlapping_break_tie;
 	global_context -> debug_command = debug_command;
 
 	global_context -> read_counters.unassigned_ambiguous=0;
@@ -2228,9 +2291,18 @@ int fc_thread_start_threads(fc_thread_global_context_t * global_context, int et_
 
 	if(global_context -> is_read_details_out)
 	{
-		char tmp_fname[350];
+		char tmp_fname[350], *modified_fname;
+		int i=0;
 		sprintf(tmp_fname, "%s.featureCounts", global_context -> raw_input_file_name);
-		global_context -> SAM_output_fp = f_subr_open(tmp_fname, "w");
+		modified_fname = tmp_fname;
+		while(modified_fname[0]=='/' || modified_fname[0]=='.' || modified_fname[0]=='\\'){
+			modified_fname ++;
+		}
+		while(modified_fname[i]){
+			if(modified_fname[i]=='\\' || modified_fname[i]=='/'||modified_fname[i]==' ')modified_fname[i]='.';
+			i++;
+		}
+		global_context -> SAM_output_fp = f_subr_open(modified_fname, "w");
 		if(!global_context -> SAM_output_fp)
 		{
 			SUBREADprintf("Unable to create file '%s'; the read assignment details are not written.\n", tmp_fname);
@@ -2270,6 +2342,9 @@ int fc_thread_start_threads(fc_thread_global_context_t * global_context, int et_
 		global_context -> thread_contexts[xk1].line_buffer2 = malloc(global_context -> line_length + 2);
 		global_context -> thread_contexts[xk1].chro_name_buff = malloc(CHROMOSOME_NAME_LENGTH);
 		global_context -> thread_contexts[xk1].strm_buffer = malloc(sizeof(z_stream));
+		if(global_context -> calculate_overlapping_lengths)
+			global_context -> thread_contexts[xk1].read_coverage_bits = malloc((MAX_READ_LENGTH / 8 + 1)*MAX_HIT_NUMBER);
+		else global_context -> thread_contexts[xk1].read_coverage_bits = NULL;
 
 		global_context -> thread_contexts[xk1].unpaired_fragment_no = 0;
 		global_context -> thread_contexts[xk1].read_counters.assigned_reads = 0;
@@ -2308,6 +2383,8 @@ void fc_thread_destroy_thread_context(fc_thread_global_context_t * global_contex
 	for(xk1=0; xk1<global_context-> thread_number; xk1++)
 	{
 		//printf("CHRR_FREE\n");
+		if(global_context -> thread_contexts[xk1].read_coverage_bits)
+			free(global_context -> thread_contexts[xk1].read_coverage_bits);	
 		free(global_context -> thread_contexts[xk1].count_table);	
 		free(global_context -> thread_contexts[xk1].line_buffer1);	
 		free(global_context -> thread_contexts[xk1].line_buffer2);	
@@ -2700,12 +2777,14 @@ static struct option long_options[] =
 	{"readExtension5", required_argument, 0, 0},
 	{"readExtension3", required_argument, 0, 0},
 	{"read2pos", required_argument, 0, 0},
-	{"minReadOverlap", required_argument, 0, 0},
+	{"minOverlap", required_argument, 0, 0},
 	{"countSplitAlignmentsOnly", no_argument, 0, 0},
 	{"debugCommand", required_argument, 0, 0},
 	{"ignoreDup", no_argument, 0, 0},
 	{"donotsort", no_argument, 0, 0},
 	{"fraction", no_argument, 0, 0},
+	{"order", required_argument, 0, 'S'},
+	{"largestOverlap", no_argument, 0,0},
 	{0, 0, 0, 0}
 };
 
@@ -2778,6 +2857,8 @@ void print_usage()
 	SUBREADputs("    "); 
 	SUBREADputs("    -T <int>  \tNumber of the threads. 1 by default."); 
 	SUBREADputs("    "); 
+	SUBREADputs("    -v        \tOutput version of the program.");
+	SUBREADputs("    "); 
 	SUBREADputs("    -R        \tOutput read counting result for each read/fragment. For each");
 	SUBREADputs("              \tinput read file, read counting results for reads/fragments will");
 	SUBREADputs("              \tbe saved to a tab-delimited file that contains four columns");
@@ -2787,10 +2868,14 @@ void print_usage()
 	SUBREADputs("              \tthe file is the same as name of the input read file except a");
 	SUBREADputs("              \tsuffix `.featureCounts' is added.");
 	SUBREADputs("    "); 
-	SUBREADputs("    --minReadOverlap <int>      Specify the minimum number of overlapped bases");
-	SUBREADputs("              \trequired to assign a read to a feature. 1 by default. Negative");
-	SUBREADputs("              \tvalues are permitted, indicating a gap being allowed between a");
-	SUBREADputs("              \tread and a feature.");
+	SUBREADputs("    --largestOverlap            If specified, reads (or fragments) will be");
+	SUBREADputs("              \tassigned to the target that has the largest number of overlapping");
+	SUBREADputs("              \tbases.");
+	SUBREADputs("    "); 
+	SUBREADputs("    --minOverlap <int>          Specify the minimum required number of");
+	SUBREADputs("              \toverlapping bases between a read (or a fragment) and a feature.");
+	SUBREADputs("              \t1 by default. If a negative value is provided, the read will be");
+	SUBREADputs("              \textended from both ends.");
 	SUBREADputs("    "); 
 	SUBREADputs("    --readExtension5 <int>      Reads are extended upstream by <int> bases from");
 	SUBREADputs("              \ttheir 5' end."); 
@@ -2844,19 +2929,18 @@ void print_usage()
 	SUBREADputs("              \tsuccessfully aligned will be considered for summarization."); 
 	SUBREADputs("              \tThis option is only applicable for paired-end reads."); 
 	SUBREADputs("    "); 
+	SUBREADputs("    -S <ff:fr:rf> Orientation of the two read from the same pair, 'fr' by");
+	SUBREADputs("              \tby default.");
+	SUBREADputs("    "); 
 	SUBREADputs("    -C        \tIf specified, the chimeric fragments (those fragments that "); 
 	SUBREADputs("              \thave their two ends aligned to different chromosomes) will"); 
 	SUBREADputs("              \tNOT be included for summarization. This option is only "); 
 	SUBREADputs("              \tapplicable for paired-end read data."); 
 	SUBREADputs("    "); 
-	SUBREADputs("    -v        \tOutput version of the program.");
-	SUBREADputs("    "); 
 	SUBREADputs("    --donotsort   If specified, paired end reads will not be reordered even if");
         SUBREADputs("              \treads from the same pair were found not to be next to each other");
         SUBREADputs("              \tin the input. ");
 	SUBREADputs("    "); 
-
-
 }
 
 
@@ -2904,6 +2988,9 @@ int readSummary(int argc,char *argv[]){
 	30: debug_command # This is for debug only; RfeatureCounts should pass a space (" ") to this parameter, disabling the debug command.
 	31: as.numeric(is_duplicate_ignored) # 0 = INCLUDE DUPLICATE READS; 1 = IGNORE DUPLICATE READS (0x400 FLAG IS SET) ; "0" by default.
 	32: as.numeric(do_not_sort)   # 1 = NEVER SORT THE PE BAM/SAM FILES; 0 = SORT THE BAM/SAM FILE IF IT IS FOUND NOT SORTED.
+	33: as.numeric(fractionMultiMapping) # 1 = calculate fraction numbers if a read overlaps with multiple features or meta-features. "-M" must be specified when fractions are caculated.
+	34: as.numeric(useOverlappingBreakTie) # 1 = Select features or meta-features with a longer overlapping length; 0 = just use read-voting strategy: one overlapping read = 1 vote
+	35: Pair_Orientations # FF, FR, RF or RR. This parameter matters only if "-s" option is 1 or 2.
 	 */
 
 	int isStrandChecked, isCVersion, isChimericDisallowed, isPEDistChecked, minMappingQualityScore=0, isInputFileResortNeeded, feature_block_size = 20, reduce_5_3_ends_to_one;
@@ -2911,7 +2998,7 @@ int readSummary(int argc,char *argv[]){
 	long *start, *stop;
 	int *geneid;
 
-	char *nameFeatureTypeColumn, *nameGeneIDColumn,*debug_command;
+	char *nameFeatureTypeColumn, *nameGeneIDColumn,*debug_command, *pair_orientations;
 	long nexons;
 
 
@@ -2924,7 +3011,7 @@ int readSummary(int argc,char *argv[]){
 	curpos = 0;
 	curchr_name = "";
 
-	int isPE, minPEDistance, maxPEDistance, isReadSummaryReport, isBothEndRequired, isMultiMappingAllowed, fiveEndExtension, threeEndExtension, minReadOverlap, isSplitAlignmentOnly, is_duplicate_ignored, doNotSort, fractionMultiMapping;
+	int isPE, minPEDistance, maxPEDistance, isReadSummaryReport, isBothEndRequired, isMultiMappingAllowed, fiveEndExtension, threeEndExtension, minFragmentOverlap, isSplitAlignmentOnly, is_duplicate_ignored, doNotSort, fractionMultiMapping, useOverlappingBreakTie;
 
 	int isSAM, isGTF, n_input_files=0;
 	char *  alias_file_name = NULL, * cmd_rebuilt = NULL;
@@ -2994,18 +3081,28 @@ int readSummary(int argc,char *argv[]){
 	if(thread_number<1) thread_number=1;
 	if(thread_number>16)thread_number=16;
 
+	int Param_fiveEndExtension, Param_threeEndExtension;
 	if(argc>25)
-		fiveEndExtension = atoi(argv[25]);
-	else 	fiveEndExtension = 0;
+		Param_fiveEndExtension = atoi(argv[25]);
+	else    Param_fiveEndExtension = 0;
 
 	if(argc>26)
-		threeEndExtension = atoi(argv[26]);
-	else	threeEndExtension = 0;
+		Param_threeEndExtension = atoi(argv[26]);
+	else    Param_threeEndExtension = 0;
 
 	if(argc>27)
-		minReadOverlap = atoi(argv[27]);
-	else	minReadOverlap = 1;
-	
+		minFragmentOverlap = atoi(argv[27]);
+	else    minFragmentOverlap = 1;
+
+	if(minFragmentOverlap <1){
+		fiveEndExtension = 1 - minFragmentOverlap;
+		threeEndExtension = 1 - minFragmentOverlap;
+		minFragmentOverlap = 1;
+	}else{
+		fiveEndExtension = Param_fiveEndExtension;
+		threeEndExtension = Param_threeEndExtension;
+	}
+
 	if(argc>28)
 		isSplitAlignmentOnly = atoi(argv[28]);
 	else	isSplitAlignmentOnly = 0;
@@ -3035,12 +3132,20 @@ int readSummary(int argc,char *argv[]){
 	else
 		fractionMultiMapping = 0;
 
+	if(argc>34)
+		useOverlappingBreakTie = atoi(argv[34]);
+	else	useOverlappingBreakTie = 0;
+
+
+	if(argc>35)
+		pair_orientations = argv[35];
+	else	pair_orientations = "FR";
 
 	unsigned int buffer_size = 1024*1024*6;
 
 
 	fc_thread_global_context_t global_context;
-	fc_thread_init_global_context(& global_context, buffer_size, thread_number, MAX_LINE_LENGTH, isPE, minPEDistance, maxPEDistance,isGeneLevel, isMultiOverlapAllowed, isStrandChecked, (char *)argv[3] , isReadSummaryReport, isBothEndRequired, isChimericDisallowed, isPEDistChecked, nameFeatureTypeColumn, nameGeneIDColumn, minMappingQualityScore,isMultiMappingAllowed, isSAM, alias_file_name, cmd_rebuilt, isInputFileResortNeeded, feature_block_size, isCVersion, fiveEndExtension, threeEndExtens [...]
+	fc_thread_init_global_context(& global_context, buffer_size, thread_number, MAX_LINE_LENGTH, isPE, minPEDistance, maxPEDistance,isGeneLevel, isMultiOverlapAllowed, isStrandChecked, (char *)argv[3] , isReadSummaryReport, isBothEndRequired, isChimericDisallowed, isPEDistChecked, nameFeatureTypeColumn, nameGeneIDColumn, minMappingQualityScore,isMultiMappingAllowed, isSAM, alias_file_name, cmd_rebuilt, isInputFileResortNeeded, feature_block_size, isCVersion, fiveEndExtension, threeEndExtens [...]
 
 	if( global_context.is_multi_mapping_allowed != ALLOW_ALL_MULTI_MAPPING && global_context.use_fraction_multi_mapping)
 	{
@@ -3696,7 +3801,7 @@ int main(int argc, char ** argv)
 int feature_count_main(int argc, char ** argv)
 #endif
 {
-	char * Rargv[34];
+	char * Rargv[36];
 	char annot_name[300];
 	char * out_name = malloc(300);
 	char * alias_file_name = malloc(300);
@@ -3713,6 +3818,7 @@ int feature_count_main(int argc, char ** argv)
 	char min_qual_score_str[11];
 	char feature_block_size_str[11];
 	char Strand_Sensitive_Str[11];
+	char Pair_Orientations[3];
 	char * very_long_file_names;
 	int is_Input_Need_Reorder = 0;
 	int is_PE = 0;
@@ -3733,12 +3839,13 @@ int feature_count_main(int argc, char ** argv)
 	int use_fraction_multimapping = 0;
 	int threads = 1;
 	int isGTF = 1;
+	int use_overlapping_length_break_tie = 0;
 	char nthread_str[4];
 	int option_index = 0;
 	int c;
 	int very_long_file_names_size = 200;
-	int fiveEndExtension = 0, threeEndExtension = 0, minReadOverlap = 1;
-	char strFiveEndExtension[11], strThreeEndExtension[11], strMinReadOverlap[11];
+	int fiveEndExtension = 0, threeEndExtension = 0, minFragmentOverlap = 1;
+	char strFiveEndExtension[11], strThreeEndExtension[11], strMinFragmentOverlap[11];
 	very_long_file_names = malloc(very_long_file_names_size);
 	very_long_file_names [0] = 0;
 
@@ -3765,12 +3872,19 @@ int feature_count_main(int argc, char ** argv)
 	opterr=1;
 	optopt=63;
 
-	while ((c = getopt_long (argc, argv, "A:g:t:T:o:a:d:D:L:Q:pbF:fs:SCBPMORv?", long_options, &option_index)) != -1)
+	while ((c = getopt_long (argc, argv, "A:g:t:T:o:a:d:D:L:Q:pbF:fs:S:CBPMORv?", long_options, &option_index)) != -1)
 		switch(c)
 		{
 			case 'S':
-				SUBREADprintf("The '-S' option has been deprecated.\n FeatureCounts will automatically examine the read order.\n");
-				//is_Input_Need_Reorder = 1;
+				if(strlen(optarg)!=2 || (strcmp(optarg, "ff")!=0 && strcmp(optarg, "rf")!=0 && strcmp(optarg, "fr")!=0)){
+					SUBREADprintf("The order parameter can only be ff, fr or rf.\n");
+					print_usage();
+					return -1;
+				}
+				Pair_Orientations[0]=(optarg[0]=='r'?'r':'f');
+				Pair_Orientations[1]=(optarg[1]=='f'?'f':'r');
+				Pair_Orientations[2]=0;
+
 				break;
 			case 'A':
 				strcpy(alias_file_name, optarg);
@@ -3866,9 +3980,9 @@ int feature_count_main(int argc, char ** argv)
 					threeEndExtension = max(0, threeEndExtension);
 				}
 
-				if(strcmp("minReadOverlap", long_options[option_index].name)==0)
+				if(strcmp("minOverlap", long_options[option_index].name)==0)
 				{
-					minReadOverlap = atoi(optarg);
+					minFragmentOverlap = atoi(optarg);
 				}
 
 				if(strcmp("debugCommand", long_options[option_index].name)==0)
@@ -3896,6 +4010,11 @@ int feature_count_main(int argc, char ** argv)
 						
 				}				
 
+				if(strcmp("largestOverlap", long_options[option_index].name)==0)
+				{
+					use_overlapping_length_break_tie = 1;
+				}
+
 				if(strcmp("donotsort", long_options[option_index].name)==0)
 				{
 					do_not_sort = 1;
@@ -3915,11 +4034,11 @@ int feature_count_main(int argc, char ** argv)
 		}
 
 
-	if(minReadOverlap<1)
+	if(minFragmentOverlap<1)
 	{
-		fiveEndExtension = - minReadOverlap + 1;
-		threeEndExtension =  - minReadOverlap + 1;
-		minReadOverlap = 1;
+		fiveEndExtension = - minFragmentOverlap + 1;
+		threeEndExtension =  - minFragmentOverlap + 1;
+		minFragmentOverlap = 1;
 	}
 
 	if(out_name[0]==0 || annot_name[0]==0||argc == optind)
@@ -3946,7 +4065,7 @@ int feature_count_main(int argc, char ** argv)
 
 	sprintf(strFiveEndExtension, "%d", fiveEndExtension);
 	sprintf(strThreeEndExtension, "%d", threeEndExtension);
-	sprintf(strMinReadOverlap, "%d", minReadOverlap);
+	sprintf(strMinFragmentOverlap, "%d", minFragmentOverlap);
 	sprintf(nthread_str,"%d", threads);
 	sprintf(min_dist_str,"%d",min_dist);
 	sprintf(max_dist_str,"%d",max_dist);
@@ -3980,14 +4099,16 @@ int feature_count_main(int argc, char ** argv)
 	Rargv[24] = feature_block_size_str;
 	Rargv[25] = strFiveEndExtension;
 	Rargv[26] = strThreeEndExtension;
-	Rargv[27] = strMinReadOverlap;
+	Rargv[27] = strMinFragmentOverlap;
 	Rargv[28] = is_Split_Alignment_Only?"1":"0";
 	Rargv[29] = (reduce_5_3_ends_to_one == 0?"0":(reduce_5_3_ends_to_one==REDUCE_TO_3_PRIME_END?"3":"5"));
 	Rargv[30] = debug_command;
 	Rargv[31] = is_duplicate_ignored?"1":"0";
 	Rargv[32] = do_not_sort?"1":"0";
 	Rargv[33] = use_fraction_multimapping?"1":"0";
-	int retvalue = readSummary(34, Rargv);
+	Rargv[34] = use_overlapping_length_break_tie?"1":"0";
+	Rargv[35] = Pair_Orientations;
+	int retvalue = readSummary(36, Rargv);
 
 	free(very_long_file_names);
 	free(out_name);
diff --git a/src/subread.h b/src/subread.h
index 00f8b03..5955d9c 100644
--- a/src/subread.h
+++ b/src/subread.h
@@ -66,6 +66,7 @@
 #define MAX_INDEL_SECTIONS 7
 //#define XBIG_MARGIN_RECORD_SIZE 24 
 #define MAX_INSERTION_LENGTH 200
+#define FC_CIGAR_PARSER_ITEMS 9
 //#define BASE_BLOCK_LENGTH 15000000
 //#define NEED_SUBREAD_STATISTIC
 
diff --git a/test/featureCounts/result/test-minimum.FC b/test/featureCounts/result/test-minimum.FC
new file mode 100644
index 0000000..18888d0
--- /dev/null
+++ b/test/featureCounts/result/test-minimum.FC
@@ -0,0 +1,9 @@
+# Program:featureCounts v1.4.6-p4; Command:"../../bin/featureCounts" "-a" "data/test-minimum.GTF" "-o" "result/test-minimum.FC" "data/test-minimum.sam" 
+Geneid	Chr	Start	End	Strand	Length	data/test-minimum.sam
+simu_gene1	chr3;chr3;chr3	100;20000;40000	10000;30000;89000	+;+;+	68903	15
+simu_gene2	chr3;chr3	100010;102000	101000;131000	+;+	29992	4
+simu_gene3	chr3;chr3;chr3;chr3	500010;502000;504000;600000	501000;503000;529000;669000	-;-;-;-	95994	10
+simu_gene4	chr3;chr3;chr3	602000;672000;702000	631000;699000;719000	+;+;+	73003	2
+simu_gene5	chr4;chr4;chr4;chr4	20000;120000;200000;220000	100000;190000;210000;300000	-;-;-;-	240004	74
+simu_gene6	chr4;chr4	420000;500000	490000;560000	-;-	130002	30
+simu_gene7	chr5;chr5;chr5	120000;500000;970000	490000;960000;1000000	-;-;-	860003	254
diff --git a/test/featureCounts/result/test-minimum.FC.summary b/test/featureCounts/result/test-minimum.FC.summary
new file mode 100644
index 0000000..4df639a
--- /dev/null
+++ b/test/featureCounts/result/test-minimum.FC.summary
@@ -0,0 +1,12 @@
+Status	data/test-minimum.sam
+Assigned	389
+Unassigned_Ambiguity	2
+Unassigned_MultiMapping	191
+Unassigned_NoFeatures	416
+Unassigned_Unmapped	0
+Unassigned_MappingQuality	0
+Unassigned_FragmentLength	0
+Unassigned_Chimera	0
+Unassigned_Secondary	0
+Unassigned_Nonjunction	0
+Unassigned_Duplicate	0

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/subread.git



More information about the debian-med-commit mailing list